diff --git a/docs/my-website/docs/tutorials/compare_llms.md b/docs/my-website/docs/tutorials/compare_llms.md
index c4c73b174..2307758c0 100644
--- a/docs/my-website/docs/tutorials/compare_llms.md
+++ b/docs/my-website/docs/tutorials/compare_llms.md
@@ -1,123 +1,255 @@
-import Image from '@theme/IdealImage';
+# Benchmark Llama2, Claude1.2, GPT3.5 for a use case {#litellm---benchmark-llama2-claude12-and-gpt35-for-a-use-case}
-# Comparing LLMs on a Test Set using LiteLLM
+In this notebook for a given use case we run the same question on 3 LLMs and compare:
+- LLM Response
+- Response Time
+- Response Cost
-
+## Sample output for a question
-LiteLLM allows you to use any LLM as a drop in replacement for
-`gpt-3.5-turbo`
-
-This notebook walks through how you can compare GPT-4 vs Claude-2 on a
-given test set using litellm
-
-## Output at the end of this tutorial:
-
-
-
-
-
-
+
``` python
!pip install litellm
```
-
+## Example Use Case 1 - Code Generator
-
-
-``` python
-from litellm import completion
-import litellm
-
-# init your test set questions
-questions = [
- "how do i call completion() using LiteLLM",
- "does LiteLLM support VertexAI",
- "how do I set my keys on replicate llama2?",
-]
-
-
-# set your prompt
-prompt = """
+### For this use case enter your system prompt and questions
+```` python
+# enter your system prompt if you have one
+system_prompt = """
You are a coding assistant helping users using litellm.
-litellm is a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages:
+litellm is a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints
+--
+Sample Usage:
+```
+pip install litellm
+from litellm import completion
+## set ENV variables
+os.environ["OPENAI_API_KEY"] = "openai key"
+os.environ["COHERE_API_KEY"] = "cohere key"
+messages = [{ "content": "Hello, how are you?","role": "user"}]
+# openai call
+response = completion(model="gpt-3.5-turbo", messages=messages)
+# cohere call
+response = completion("command-nightly", messages)
+```
"""
-```
-
-
+# qustions/logs you want to run the LLM on
+questions = [
+ "what is litellm?",
+ "why should I use LiteLLM",
+ "does litellm support Anthropic LLMs",
+ "write code to make a litellm completion call",
+]
+````
+
+## Running questions
+
+### Select from 100+ LLMs here: {#select-from-100-llms-here-httpsdocslitellmaidocsproviders}
``` python
+import litellm
+from litellm import completion, completion_cost
import os
+import time
+
+# optional use litellm dashboard to view logs
+# litellm.use_client = True
+# litellm.token = "ishaan_2@berri.ai" # set your email
+
+
+# set API keys
+os.environ['TOGETHERAI_API_KEY'] = ""
os.environ['OPENAI_API_KEY'] = ""
os.environ['ANTHROPIC_API_KEY'] = ""
+
+
+# select LLMs to benchmark
+# using https://api.together.xyz/playground for llama2
+# try any supported LLM here: https://docs.litellm.ai/docs/providers
+
+models = ['togethercomputer/llama-2-70b-chat', 'gpt-3.5-turbo', 'claude-instant-1.2']
+data = []
+
+for question in questions: # group by question
+ for model in models:
+ print(f"running question: {question} for model: {model}")
+ start_time = time.time()
+ # show response, response time, cost for each question
+ response = completion(
+ model=model,
+ max_tokens=500,
+ messages = [
+ {
+ "role": "system", "content": system_prompt
+ },
+ {
+ "role": "user", "content": question
+ }
+ ],
+ )
+ end = time.time()
+ total_time = end-start_time # response time
+ # print(response)
+ cost = completion_cost(response) # cost for completion
+ raw_response = response['choices'][0]['message']['content'] # response string
+
+
+ # add log to pandas df
+ data.append(
+ {
+ 'Model': model,
+ 'Question': question,
+ 'Response': raw_response,
+ 'ResponseTime': total_time,
+ 'Cost': cost
+ })
```
-
-
-
-
-
-
-
-
-## Calling gpt-3.5-turbo and claude-2 on the same questions
-
-## LiteLLM `completion()` allows you to call all LLMs in the same format
-
-
-
-
-
+## View Benchmarks for LLMs
``` python
-results = [] # for storing results
-
-models = ['gpt-3.5-turbo', 'claude-2'] # define what models you're testing, see: https://docs.litellm.ai/docs/completion/supported
-for question in questions:
- row = [question]
- for model in models:
- print("Calling:", model, "question:", question)
- response = completion( # using litellm.completion
- model=model,
- messages=[
- {'role': 'system', 'content': prompt},
- {'role': 'user', 'content': question}
- ]
- )
- answer = response.choices[0].message['content']
- row.append(answer)
- print(print("Calling:", model, "answer:", answer))
-
- results.append(row) # save results
-
-```
-
-
-
-
-
-## Visualizing Results
-
-
-
-
-
-``` python
-# Create a table to visualize results
+from IPython.display import display
+from IPython.core.interactiveshell import InteractiveShell
+InteractiveShell.ast_node_interactivity = "all"
+from IPython.display import HTML
import pandas as pd
-columns = ['Question'] + models
-df = pd.DataFrame(results, columns=columns)
+df = pd.DataFrame(data)
+grouped_by_question = df.groupby('Question')
-df
+for question, group_data in grouped_by_question:
+ print(f"Question: {question}")
+ HTML(group_data.to_html())
```
-## Output Table
-
-
+## Use Case 2 - Rewrite user input concisely
+
+``` python
+# enter your system prompt if you have one
+system_prompt = """
+For a given user input, rewrite the input to make be more concise.
+"""
+
+# user input for re-writing questions
+questions = [
+ "LiteLLM is a lightweight Python package that simplifies the process of making API calls to various language models. Here are some reasons why you should use LiteLLM:nn1. **Simplified API Calls**: LiteLLM abstracts away the complexity of making API calls to different language models. It provides a unified interface for invoking models from OpenAI, Azure, Cohere, Anthropic, Huggingface, and more.nn2. **Easy Integration**: LiteLLM seamlessly integrates with your existing codebase. You can import the package and start making API calls with just a few lines of code.nn3. **Flexibility**: LiteLLM supports a variety of language models, including GPT-3, GPT-Neo, chatGPT, and more. You can choose the model that suits your requirements and easily switch between them.nn4. **Convenience**: LiteLLM handles the authentication and connection details for you. You just need to set the relevant environment variables, and the package takes care of the rest.nn5. **Quick Prototyping**: LiteLLM is ideal for rapid prototyping and experimentation. With its simple API, you can quickly generate text, chat with models, and build interactive applications.nn6. **Community Support**: LiteLLM is actively maintained and supported by a community of developers. You can find help, share ideas, and collaborate with others to enhance your projects.nnOverall, LiteLLM simplifies the process of making API calls to language models, saving you time and effort while providing flexibility and convenience",
+ "Hi everyone! I'm [your name] and I'm currently working on [your project/role involving LLMs]. I came across LiteLLM and was really excited by how it simplifies working with different LLM providers. I'm hoping to use LiteLLM to [build an app/simplify my code/test different models etc]. Before finding LiteLLM, I was struggling with [describe any issues you faced working with multiple LLMs]. With LiteLLM's unified API and automatic translation between providers, I think it will really help me to [goals you have for using LiteLLM]. Looking forward to being part of this community and learning more about how I can build impactful applications powered by LLMs!Let me know if you would like me to modify or expand on any part of this suggested intro. I'm happy to provide any clarification or additional details you need!",
+ "Traceloop is a platform for monitoring and debugging the quality of your LLM outputs. It provides you with a way to track the performance of your LLM application; rollout changes with confidence; and debug issues in production. It is based on OpenTelemetry, so it can provide full visibility to your LLM requests, as well vector DB usage, and other infra in your stack."
+]
+```
+
+## Run Questions
+
+``` python
+import litellm
+from litellm import completion, completion_cost
+import os
+import time
+
+# optional use litellm dashboard to view logs
+# litellm.use_client = True
+# litellm.token = "ishaan_2@berri.ai" # set your email
+
+os.environ['TOGETHERAI_API_KEY'] = ""
+os.environ['OPENAI_API_KEY'] = ""
+os.environ['ANTHROPIC_API_KEY'] = ""
+
+models = ['togethercomputer/llama-2-70b-chat', 'gpt-3.5-turbo', 'claude-instant-1.2'] # enter llms to benchmark
+data_2 = []
+
+for question in questions: # group by question
+ for model in models:
+ print(f"running question: {question} for model: {model}")
+ start_time = time.time()
+ # show response, response time, cost for each question
+ response = completion(
+ model=model,
+ max_tokens=500,
+ messages = [
+ {
+ "role": "system", "content": system_prompt
+ },
+ {
+ "role": "user", "content": "User input:" + question
+ }
+ ],
+ )
+ end = time.time()
+ total_time = end-start_time # response time
+ # print(response)
+ cost = completion_cost(response) # cost for completion
+ raw_response = response['choices'][0]['message']['content'] # response string
+ #print(raw_response, total_time, cost)
+
+ # add to pandas df
+ data_2.append(
+ {
+ 'Model': model,
+ 'Question': question,
+ 'Response': raw_response,
+ 'ResponseTime': total_time,
+ 'Cost': cost
+ })
+
+
+```
+## View Logs - Group by Question
+``` python
+from IPython.display import display
+from IPython.core.interactiveshell import InteractiveShell
+InteractiveShell.ast_node_interactivity = "all"
+from IPython.display import HTML
+import pandas as pd
+
+df = pd.DataFrame(data_2)
+grouped_by_question = df.groupby('Question')
+
+for question, group_data in grouped_by_question:
+ print(f"Question: {question}")
+ HTML(group_data.to_html())
+```
+
+#### User Question
+ Question: Hi everyone! I'm [your name] and I'm currently working on [your project/role involving LLMs]. I came across LiteLLM and was really excited by how it simplifies working with different LLM providers. I'm hoping to use LiteLLM to [build an app/simplify my code/test different models etc]. Before finding LiteLLM, I was struggling with [describe any issues you faced working with multiple LLMs]. With LiteLLM's unified API and automatic translation between providers, I think it will really help me to [goals you have for using LiteLLM]. Looking forward to being part of this community and learning more about how I can build impactful applications powered by LLMs!Let me know if you would like me to modify or expand on any part of this suggested intro. I'm happy to provide any clarification or additional details you need!
+#### Logs
+
+
+
+ |
+ Model |
+ Response |
+ ResponseTime |
+ Cost |
+
+
+
+
+ 3 |
+ togethercomputer/llama-2-70b-chat |
+ nHere's a more concise version of the user input:nn"Hi everyone! I'm [your name] and I'm working on [your project/role involving LLMs]. I recently discovered LiteLLM and I'm excited to use it to [build an app/simplify my code/test different models etc]. Before LiteLLM, I struggled with [describe any issues you faced working with multiple LLMs]. I'm looking forward to using LiteLLM's unified API and automatic translation to achieve my goals. I'm eager to learn more about building impactful applications powered by LLMs and to be part of this community. Let me know if you have any questions or need further clarification."nnIn this revised version, we've kept the essential information and removed some of the extraneous language. We've also rephrased some of the sentences to make them more concise and easier to read. |
+ 18.300620 |
+ 0.001200 |
+
+
+ 4 |
+ gpt-3.5-turbo |
+ User input: Hi, I'm [your name] and I'm excited about using LiteLLM to simplify working with different LLM providers. Before finding LiteLLM, I faced challenges working with multiple LLMs. With LiteLLM's unified API and automatic translation, I believe it will help me achieve my goals of [state your goals]. I look forward to being part of this community and learning how to build impactful applications with LLMs. Let me know if you need any further clarification or details. |
+ 7.385472 |
+ 0.000525 |
+
+
+ 5 |
+ claude-instant-1.2 |
+ Here is a more concise rewrite of the user input:nnHi everyone, I'm [your name]. I'm currently [your project/role] and came across LiteLLM, which simplifies working with different LLMs through its unified API. I hope to [build an app/simplify code/test models] with LiteLLM since I previously struggled with [issues]. LiteLLM's automatic translation between providers will help me [goals] and build impactful LLM applications. Looking forward to learning more as part of this community. Let me know if you need any clarification on my plans to use LiteLLM. |
+ 8.628217 |
+ 0.001022 |
+
+
+
diff --git a/docs/my-website/docs/tutorials/compare_llms_2.md b/docs/my-website/docs/tutorials/compare_llms_2.md
new file mode 100644
index 000000000..c4c73b174
--- /dev/null
+++ b/docs/my-website/docs/tutorials/compare_llms_2.md
@@ -0,0 +1,123 @@
+import Image from '@theme/IdealImage';
+
+# Comparing LLMs on a Test Set using LiteLLM
+
+
+
+
+LiteLLM allows you to use any LLM as a drop in replacement for
+`gpt-3.5-turbo`
+
+This notebook walks through how you can compare GPT-4 vs Claude-2 on a
+given test set using litellm
+
+## Output at the end of this tutorial:
+
+
+
+
+
+
+
+``` python
+!pip install litellm
+```
+
+
+
+
+
+``` python
+from litellm import completion
+import litellm
+
+# init your test set questions
+questions = [
+ "how do i call completion() using LiteLLM",
+ "does LiteLLM support VertexAI",
+ "how do I set my keys on replicate llama2?",
+]
+
+
+# set your prompt
+prompt = """
+You are a coding assistant helping users using litellm.
+litellm is a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages:
+
+"""
+```
+
+
+
+
+
+``` python
+import os
+os.environ['OPENAI_API_KEY'] = ""
+os.environ['ANTHROPIC_API_KEY'] = ""
+```
+
+
+
+
+
+
+
+
+
+## Calling gpt-3.5-turbo and claude-2 on the same questions
+
+## LiteLLM `completion()` allows you to call all LLMs in the same format
+
+
+
+
+
+``` python
+results = [] # for storing results
+
+models = ['gpt-3.5-turbo', 'claude-2'] # define what models you're testing, see: https://docs.litellm.ai/docs/completion/supported
+for question in questions:
+ row = [question]
+ for model in models:
+ print("Calling:", model, "question:", question)
+ response = completion( # using litellm.completion
+ model=model,
+ messages=[
+ {'role': 'system', 'content': prompt},
+ {'role': 'user', 'content': question}
+ ]
+ )
+ answer = response.choices[0].message['content']
+ row.append(answer)
+ print(print("Calling:", model, "answer:", answer))
+
+ results.append(row) # save results
+
+```
+
+
+
+
+
+## Visualizing Results
+
+
+
+
+
+``` python
+# Create a table to visualize results
+import pandas as pd
+
+columns = ['Question'] + models
+df = pd.DataFrame(results, columns=columns)
+
+df
+```
+## Output Table
+
+
+