diff --git a/docs/my-website/docs/tutorials/compare_llms.md b/docs/my-website/docs/tutorials/compare_llms.md
new file mode 100644
index 0000000000..c4c73b174e
--- /dev/null
+++ b/docs/my-website/docs/tutorials/compare_llms.md
@@ -0,0 +1,123 @@
+import Image from '@theme/IdealImage';
+
+# Comparing LLMs on a Test Set using LiteLLM
+
+
+
+
+LiteLLM allows you to use any LLM as a drop in replacement for
+`gpt-3.5-turbo`
+
+This notebook walks through how you can compare GPT-4 vs Claude-2 on a
+given test set using litellm
+
+## Output at the end of this tutorial:
+
+
+
+
+
+
+
+``` python
+!pip install litellm
+```
+
+
+
+
+
+``` python
+from litellm import completion
+import litellm
+
+# init your test set questions
+questions = [
+ "how do i call completion() using LiteLLM",
+ "does LiteLLM support VertexAI",
+ "how do I set my keys on replicate llama2?",
+]
+
+
+# set your prompt
+prompt = """
+You are a coding assistant helping users using litellm.
+litellm is a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages:
+
+"""
+```
+
+
+
+
+
+``` python
+import os
+os.environ['OPENAI_API_KEY'] = ""
+os.environ['ANTHROPIC_API_KEY'] = ""
+```
+
+
+
+
+
+
+
+
+
+## Calling gpt-3.5-turbo and claude-2 on the same questions
+
+## LiteLLM `completion()` allows you to call all LLMs in the same format
+
+
+
+
+
+``` python
+results = [] # for storing results
+
+models = ['gpt-3.5-turbo', 'claude-2'] # define what models you're testing, see: https://docs.litellm.ai/docs/completion/supported
+for question in questions:
+ row = [question]
+ for model in models:
+ print("Calling:", model, "question:", question)
+ response = completion( # using litellm.completion
+ model=model,
+ messages=[
+ {'role': 'system', 'content': prompt},
+ {'role': 'user', 'content': question}
+ ]
+ )
+ answer = response.choices[0].message['content']
+ row.append(answer)
+ print(print("Calling:", model, "answer:", answer))
+
+ results.append(row) # save results
+
+```
+
+
+
+
+
+## Visualizing Results
+
+
+
+
+
+``` python
+# Create a table to visualize results
+import pandas as pd
+
+columns = ['Question'] + models
+df = pd.DataFrame(results, columns=columns)
+
+df
+```
+## Output Table
+
+
+
diff --git a/docs/my-website/img/compare_llms.png b/docs/my-website/img/compare_llms.png
new file mode 100644
index 0000000000..704489b035
Binary files /dev/null and b/docs/my-website/img/compare_llms.png differ
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 764b015220..78ad0e1806 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -51,7 +51,7 @@ const sidebars = {
items: [
'tutorials/huggingface_tutorial',
'tutorials/TogetherAI_liteLLM',
- 'tutorials/fallbacks',
+ 'tutorials/compare_llms',
'tutorials/finetuned_chat_gpt',
'tutorials/text_completion',
'tutorials/litellm_Test_Multiple_Providers',