Merge branch 'BerriAI:main' into main

2024-05-21 13:51:55 +02:00 · 2024-05-21 13:51:55 +02:00 · b89b3d8c44
commit b89b3d8c44
parent 5bf59d5295 07abccf96f
102 changed files with 8852 additions and 6557 deletions
--- a/docs/my-website/docs/image_generation.md
+++ b/docs/my-website/docs/image_generation.md
@ -150,4 +150,20 @@ response = image_generation(
            model="bedrock/stability.stable-diffusion-xl-v0",
        )
 print(f"response: {response}")
+```
+
+## VertexAI - Image Generation Models
+
+### Usage 
+
+Use this for image generation models on VertexAI
+
+```python
+response = litellm.image_generation(
+    prompt="An olympic size swimming pool",
+    model="vertex_ai/imagegeneration@006",
+    vertex_ai_project="adroit-crow-413218",
+    vertex_ai_location="us-central1",
+)
+print(f"response: {response}")
 ```
--- a/docs/my-website/docs/observability/lago.md
+++ b/docs/my-website/docs/observability/lago.md
@ -0,0 +1,173 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Lago - Usage Based Billing
+
+[Lago](https://www.getlago.com/) offers a self-hosted and cloud, metering and usage-based billing solution.
+
+<Image img={require('../../img/lago.jpeg')} />
+
+## Quick Start
+Use just 1 lines of code, to instantly log your responses **across all providers** with Lago
+
+Get your Lago [API Key](https://docs.getlago.com/guide/self-hosted/docker#find-your-api-key)
+
+```python
+litellm.callbacks = ["lago"] # logs cost + usage of successful calls to lago
+```
+
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+# pip install lago 
+import litellm
+import os
+
+os.environ["LAGO_API_BASE"] = "" # http://0.0.0.0:3000
+os.environ["LAGO_API_KEY"] = ""
+os.environ["LAGO_API_EVENT_CODE"] = "" # The billable metric's code - https://docs.getlago.com/guide/events/ingesting-usage#define-a-billable-metric
+
+# LLM API Keys
+os.environ['OPENAI_API_KEY']=""
+
+# set lago as a callback, litellm will send the data to lago
+litellm.success_callback = ["lago"] 
+ 
+# openai call
+response = litellm.completion(
+  model="gpt-3.5-turbo",
+  messages=[
+    {"role": "user", "content": "Hi 👋 - i'm openai"}
+  ],
+  user="your_customer_id" # 👈 SET YOUR CUSTOMER ID HERE
+)
+```
+
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+1. Add to Config.yaml
+```yaml
+model_list:
+- litellm_params:
+    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
+    api_key: my-fake-key
+    model: openai/my-fake-model
+  model_name: fake-openai-endpoint
+
+litellm_settings:
+  callbacks: ["lago"] # 👈 KEY CHANGE
+```
+
+2. Start Proxy
+
+```
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
+<Tabs>
+<TabItem value="curl" label="Curl">
+
+```bash
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--data ' {
+      "model": "fake-openai-endpoint",
+      "messages": [
+        {
+          "role": "user",
+          "content": "what llm are you"
+        }
+      ],
+      "user": "your-customer-id" # 👈 SET YOUR CUSTOMER ID
+    }
+'
+```
+</TabItem>
+<TabItem value="openai_python" label="OpenAI Python SDK">
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="anything",
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+    {
+        "role": "user",
+        "content": "this is a test request, write a short poem"
+    }
+], user="my_customer_id") # 👈 whatever your customer id is
+
+print(response)
+```
+</TabItem>
+<TabItem value="langchain" label="Langchain">
+
+```python
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+)
+from langchain.schema import HumanMessage, SystemMessage
+import os 
+
+os.environ["OPENAI_API_KEY"] = "anything"
+
+chat = ChatOpenAI(
+    openai_api_base="http://0.0.0.0:4000",
+    model = "gpt-3.5-turbo",
+    temperature=0.1,
+    extra_body={
+        "user": "my_customer_id"  # 👈 whatever your customer id is
+    }
+)
+
+messages = [
+    SystemMessage(
+        content="You are a helpful assistant that im using to make a test request to."
+    ),
+    HumanMessage(
+        content="test from litellm. tell me why it's amazing in 1 sentence"
+    ),
+]
+response = chat(messages)
+
+print(response)
+```
+</TabItem>
+</Tabs>
+</TabItem>
+</Tabs>
+
+
+<Image img={require('../../img/lago_2.png')} />
+
+## Advanced - Lagos Logging object 
+
+This is what LiteLLM will log to Lagos
+
+```
+{
+    "event": {
+      "transaction_id": "<generated_unique_id>",
+      "external_customer_id": <litellm_end_user_id>, # passed via `user` param in /chat/completion call - https://platform.openai.com/docs/api-reference/chat/create
+      "code": os.getenv("LAGO_API_EVENT_CODE"), 
+      "properties": {
+          "input_tokens": <number>,
+          "output_tokens": <number>,
+          "model": <string>,
+          "response_cost": <number>, # 👈 LITELLM CALCULATED RESPONSE COST - https://github.com/BerriAI/litellm/blob/d43f75150a65f91f60dc2c0c9462ce3ffc713c1f/litellm/utils.py#L1473
+      }
+    }
+}
+```
--- a/docs/my-website/docs/observability/langsmith_integration.md
+++ b/docs/my-website/docs/observability/langsmith_integration.md
@ -71,6 +71,23 @@ response = litellm.completion(
 )
 print(response)
 ```
+
+### Make LiteLLM Proxy use Custom `LANGSMITH_BASE_URL`
+
+If you're using a custom LangSmith instance, you can set the
+`LANGSMITH_BASE_URL` environment variable to point to your instance.
+For example, you can make LiteLLM Proxy log to a local LangSmith instance with
+this config:
+
+```yaml
+litellm_settings:
+  success_callback: ["langsmith"]
+
+environment_variables:
+  LANGSMITH_BASE_URL: "http://localhost:1984"
+  LANGSMITH_PROJECT: "litellm-proxy"
+```
+
 ## Support & Talk to Founders

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
--- a/docs/my-website/docs/observability/openmeter.md
+++ b/docs/my-website/docs/observability/openmeter.md
@ -20,7 +20,7 @@ Use just 2 lines of code, to instantly log your responses **across all providers
 Get your OpenMeter API Key from https://openmeter.cloud/meters

 ```python
-litellm.success_callback = ["openmeter"] # logs cost + usage of successful calls to openmeter
+litellm.callbacks = ["openmeter"] # logs cost + usage of successful calls to openmeter
 ```


@ -28,7 +28,7 @@ litellm.success_callback = ["openmeter"] # logs cost + usage of successful calls
 <TabItem value="sdk" label="SDK">

 ```python
-# pip install langfuse 
+# pip install openmeter 
 import litellm
 import os

@ -39,8 +39,8 @@ os.environ["OPENMETER_API_KEY"] = ""
 # LLM API Keys
 os.environ['OPENAI_API_KEY']=""

-# set langfuse as a callback, litellm will send the data to langfuse
-litellm.success_callback = ["openmeter"] 
+# set openmeter as a callback, litellm will send the data to openmeter
+litellm.callbacks = ["openmeter"] 
 
 # openai call
 response = litellm.completion(
@ -64,7 +64,7 @@ model_list:
  model_name: fake-openai-endpoint

 litellm_settings:
-  success_callback: ["openmeter"] # 👈 KEY CHANGE
+  callbacks: ["openmeter"] # 👈 KEY CHANGE
 ```

 2. Start Proxy
--- a/docs/my-website/docs/providers/anthropic.md
+++ b/docs/my-website/docs/providers/anthropic.md
@ -223,6 +223,32 @@ assert isinstance(

 ```

+### Setting `anthropic-beta` Header in Requests
+
+Pass the the `extra_headers` param to litellm, All headers will be forwarded to Anthropic API
+
+```python
+response = completion(
+    model="anthropic/claude-3-opus-20240229",
+    messages=messages,
+    tools=tools,
+)
+```
+
+### Forcing Anthropic Tool Use
+
+If you want Claude to use a specific tool to answer the user’s question
+
+You can do this by specifying the tool in the `tool_choice` field like so:
+```python
+response = completion(
+    model="anthropic/claude-3-opus-20240229",
+    messages=messages,
+    tools=tools,
+    tool_choice={"type": "tool", "name": "get_weather"},
+)
+```
+

 ### Parallel Function Calling 

--- a/docs/my-website/docs/providers/ollama.md
+++ b/docs/my-website/docs/providers/ollama.md
@ -101,13 +101,19 @@ Ollama supported models: https://github.com/ollama/ollama

 | Model Name           | Function Call                                                                     |
 |----------------------|-----------------------------------------------------------------------------------
-| Mistral    | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` | 
+| Mistral    | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
+| Mistral-7B-Instruct-v0.1 | `completion(model='ollama/mistral-7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
+| Mistral-7B-Instruct-v0.2 | `completion(model='ollama/mistral-7B-Instruct-v0.2', messages, api_base="http://localhost:11434", stream=False)` |
+| Mixtral-8x7B-Instruct-v0.1 | `completion(model='ollama/mistral-8x7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
+| Mixtral-8x22B-Instruct-v0.1 | `completion(model='ollama/mixtral-8x22B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
 | Llama2 7B            | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` | 
 | Llama2 13B           | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` | 
 | Llama2 70B           | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` | 
 | Llama2 Uncensored    | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | 
 | Code Llama    | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` | 
-| Llama2 Uncensored    | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | 
+| Llama2 Uncensored    | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
+|Meta LLaMa3 8B | `completion(model='ollama/llama3', messages, api_base="http://localhost:11434", stream=False)` |
+| Meta LLaMa3 70B | `completion(model='ollama/llama3:70b', messages, api_base="http://localhost:11434", stream=False)` |
 | Orca Mini            | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` |
 | Vicuna               | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` |
 | Nous-Hermes          | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` |
--- a/docs/my-website/docs/providers/openai.md
+++ b/docs/my-website/docs/providers/openai.md
@ -188,6 +188,7 @@ These also support the `OPENAI_API_BASE` environment variable, which can be used
 ## OpenAI Vision Models 
 | Model Name            | Function Call                                                   |
 |-----------------------|-----------------------------------------------------------------|
+| gpt-4o   | `response = completion(model="gpt-4o", messages=messages)` |
 | gpt-4-turbo    | `response = completion(model="gpt-4-turbo", messages=messages)` |
 | gpt-4-vision-preview    | `response = completion(model="gpt-4-vision-preview", messages=messages)` |

--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@ -508,6 +508,31 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
 | text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` |
 | text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` | 

+## Image Generation Models
+
+Usage 
+
+```python
+response = await litellm.aimage_generation(
+    prompt="An olympic size swimming pool",
+    model="vertex_ai/imagegeneration@006",
+    vertex_ai_project="adroit-crow-413218",
+    vertex_ai_location="us-central1",
+)
+```
+
+**Generating multiple images**
+
+Use the `n` parameter to pass how many images you want generated
+```python
+response = await litellm.aimage_generation(
+    prompt="An olympic size swimming pool",
+    model="vertex_ai/imagegeneration@006",
+    vertex_ai_project="adroit-crow-413218",
+    vertex_ai_location="us-central1",
+    n=1,
+)
+```

 ## Extra

--- a/docs/my-website/docs/proxy/alerting.md
+++ b/docs/my-website/docs/proxy/alerting.md
@ -1,4 +1,4 @@
-# 🚨 Alerting 
+# 🚨 Alerting / Webhooks

 Get alerts for:

@ -11,7 +11,7 @@ Get alerts for:
 - Daily Reports:
    - **LLM** Top 5 slowest deployments
    - **LLM** Top 5 deployments with most failed requests
-    - **Spend** Weekly & Monthly spend per Team, Tag
+- **Spend** Weekly & Monthly spend per Team, Tag


 ## Quick Start
@ -61,10 +61,38 @@ curl -X GET 'http://localhost:4000/health/services?service=slack' \
  -H 'Authorization: Bearer sk-1234'
 ```

+## Advanced - Opting into specific alert types

-## Extras
+Set `alert_types` if you want to Opt into only specific alert types

-### Using Discord Webhooks
+```shell
+general_settings:
+  alerting: ["slack"]
+  alert_types: ["spend_reports"] 
+```
+
+All Possible Alert Types
+
+```python
+alert_types: 
+Optional[
+List[
+    Literal[
+        "llm_exceptions",
+        "llm_too_slow",
+        "llm_requests_hanging",
+        "budget_alerts",
+        "db_exceptions",
+        "daily_reports",
+        "spend_reports",
+        "cooldown_deployment",
+        "new_model_added",
+    ]
+]
+```
+
+
+## Advanced - Using Discord Webhooks

 Discord provides a slack compatible webhook url that you can use for alerting

@ -96,3 +124,80 @@ environment_variables:
 ```

 That's it ! You're ready to go !
+
+## Advanced - [BETA] Webhooks for Budget Alerts
+
+**Note**: This is a beta feature, so the spec might change.
+
+Set a webhook to get notified for budget alerts. 
+
+1. Setup config.yaml
+
+Add url to your environment, for testing you can use a link from [here](https://webhook.site/)
+
+```bash
+export WEBHOOK_URL="https://webhook.site/6ab090e8-c55f-4a23-b075-3209f5c57906"
+```
+
+Add 'webhook' to config.yaml
+```yaml
+general_settings: 
+  alerting: ["webhook"] # 👈 KEY CHANGE
+```
+
+2. Start proxy
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+3. Test it!
+
+```bash
+curl -X GET --location 'http://0.0.0.0:4000/health/services?service=webhook' \
+--header 'Authorization: Bearer sk-1234'
+```
+
+**Expected Response**
+
+```bash
+{
+  "spend": 1, # the spend for the 'event_group'
+  "max_budget": 0, # the 'max_budget' set for the 'event_group'
+  "token": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
+  "user_id": "default_user_id",
+  "team_id": null,
+  "user_email": null,
+  "key_alias": null,
+  "projected_exceeded_data": null,
+  "projected_spend": null,
+  "event": "budget_crossed", # Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]
+  "event_group": "user",
+  "event_message": "User Budget: Budget Crossed"
+}
+```
+
+**API Spec for Webhook Event**
+
+- `spend` *float*: The current spend amount for the 'event_group'.
+- `max_budget` *float*: The maximum allowed budget for the 'event_group'.
+- `token` *str*: A hashed value of the key, used for authentication or identification purposes.
+- `user_id` *str or null*: The ID of the user associated with the event (optional).
+- `team_id` *str or null*: The ID of the team associated with the event (optional).
+- `user_email` *str or null*: The email of the user associated with the event (optional).
+- `key_alias` *str or null*: An alias for the key associated with the event (optional).
+- `projected_exceeded_date` *str or null*: The date when the budget is projected to be exceeded, returned when 'soft_budget' is set for key (optional).
+- `projected_spend` *float or null*: The projected spend amount, returned when 'soft_budget' is set for key (optional).
+- `event` *Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]*: The type of event that triggered the webhook. Possible values are:
+    * "budget_crossed": Indicates that the spend has exceeded the max budget.
+    * "threshold_crossed": Indicates that spend has crossed a threshold (currently sent when 85% and 95% of budget is reached).
+    * "projected_limit_exceeded": For "key" only - Indicates that the projected spend is expected to exceed the soft budget threshold.
+- `event_group` *Literal["user", "key", "team", "proxy"]*: The group associated with the event. Possible values are:
+    * "user": The event is related to a specific user.
+    * "key": The event is related to a specific key.
+    * "team": The event is related to a team.
+    * "proxy": The event is related to a proxy.
+
+- `event_message` *str*: A human-readable description of the event.
--- a/docs/my-website/docs/proxy/billing.md
+++ b/docs/my-website/docs/proxy/billing.md
@ -0,0 +1,319 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# 💵 Billing
+
+Bill internal teams, external customers for their usage
+
+**🚨 Requirements**
+- [Setup Lago](https://docs.getlago.com/guide/self-hosted/docker#run-the-app), for usage-based billing. We recommend following [their Stripe tutorial](https://docs.getlago.com/templates/per-transaction/stripe#step-1-create-billable-metrics-for-transaction)
+
+Steps:
+- Connect the proxy to Lago
+- Set the id you want to bill for (customers, internal users, teams)
+- Start! 
+
+## Quick Start
+
+Bill internal teams for their usage
+
+### 1. Connect proxy to Lago 
+
+Set 'lago' as a callback on your proxy config.yaml
+
+```yaml
+model_name:
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/fake
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+
+litellm_settings:
+  callbacks: ["lago"] # 👈 KEY CHANGE
+
+general_settings:
+  master_key: sk-1234
+```
+
+Add your Lago keys to the environment
+
+```bash
+export LAGO_API_BASE="http://localhost:3000" # self-host - https://docs.getlago.com/guide/self-hosted/docker#run-the-app
+export LAGO_API_KEY="3e29d607-de54-49aa-a019-ecf585729070" # Get key - https://docs.getlago.com/guide/self-hosted/docker#find-your-api-key
+export LAGO_API_EVENT_CODE="openai_tokens" # name of lago billing code
+export LAGO_API_CHARGE_BY="team_id" # 👈 Charges 'team_id' attached to proxy key
+```
+
+Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+### 2. Create Key for Internal Team 
+
+```bash
+curl 'http://0.0.0.0:4000/key/generate' \
+--header 'Authorization: Bearer sk-1234' \
+--header 'Content-Type: application/json' \
+--data-raw '{"team_id": "my-unique-id"}' # 👈 Internal Team's ID
+```
+
+Response Object:
+
+```bash
+{
+  "key": "sk-tXL0wt5-lOOVK9sfY2UacA",
+}
+```
+
+
+### 3. Start billing! 
+
+<Tabs>
+<TabItem value="curl" label="Curl">
+
+```bash
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer sk-tXL0wt5-lOOVK9sfY2UacA' \ # 👈 Team's Key
+--data ' {
+      "model": "fake-openai-endpoint",
+      "messages": [
+        {
+          "role": "user",
+          "content": "what llm are you"
+        }
+      ],
+    }
+'
+```
+</TabItem>
+<TabItem value="openai_python" label="OpenAI Python SDK">
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="sk-tXL0wt5-lOOVK9sfY2UacA", # 👈 Team's Key
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+    {
+        "role": "user",
+        "content": "this is a test request, write a short poem"
+    }
+])
+
+print(response)
+```
+</TabItem>
+<TabItem value="langchain" label="Langchain">
+
+```python
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+)
+from langchain.schema import HumanMessage, SystemMessage
+import os 
+
+os.environ["OPENAI_API_KEY"] = "sk-tXL0wt5-lOOVK9sfY2UacA" # 👈 Team's Key
+
+chat = ChatOpenAI(
+    openai_api_base="http://0.0.0.0:4000",
+    model = "gpt-3.5-turbo",
+    temperature=0.1,
+)
+
+messages = [
+    SystemMessage(
+        content="You are a helpful assistant that im using to make a test request to."
+    ),
+    HumanMessage(
+        content="test from litellm. tell me why it's amazing in 1 sentence"
+    ),
+]
+response = chat(messages)
+
+print(response)
+```
+</TabItem>
+</Tabs>
+
+**See Results on Lago**
+
+
+<Image img={require('../../img/lago_2.png')}  style={{ width: '500px', height: 'auto' }} />
+
+## Advanced - Lago Logging object 
+
+This is what LiteLLM will log to Lagos
+
+```
+{
+    "event": {
+      "transaction_id": "<generated_unique_id>",
+      "external_customer_id": <selected_id>, # either 'end_user_id', 'user_id', or 'team_id'. Default 'end_user_id'. 
+      "code": os.getenv("LAGO_API_EVENT_CODE"), 
+      "properties": {
+          "input_tokens": <number>,
+          "output_tokens": <number>,
+          "model": <string>,
+          "response_cost": <number>, # 👈 LITELLM CALCULATED RESPONSE COST - https://github.com/BerriAI/litellm/blob/d43f75150a65f91f60dc2c0c9462ce3ffc713c1f/litellm/utils.py#L1473
+      }
+    }
+}
+```
+
+## Advanced - Bill Customers, Internal Users 
+
+For:
+- Customers (id passed via 'user' param in /chat/completion call) = 'end_user_id'
+- Internal Users (id set when [creating keys](https://docs.litellm.ai/docs/proxy/virtual_keys#advanced---spend-tracking)) = 'user_id' 
+- Teams (id set when [creating keys](https://docs.litellm.ai/docs/proxy/virtual_keys#advanced---spend-tracking)) = 'team_id' 
+
+
+
+<Tabs>
+<TabItem value="customers" label="Customer Billing">
+
+1. Set 'LAGO_API_CHARGE_BY' to 'end_user_id'
+
+  ```bash
+  export LAGO_API_CHARGE_BY="end_user_id"
+  ```
+
+2. Test it!
+
+  <Tabs>
+  <TabItem value="curl" label="Curl">
+
+  ```shell
+  curl --location 'http://0.0.0.0:4000/chat/completions' \
+  --header 'Content-Type: application/json' \
+  --data ' {
+        "model": "gpt-3.5-turbo",
+        "messages": [
+          {
+            "role": "user",
+            "content": "what llm are you"
+          }
+        ],
+        "user": "my_customer_id" # 👈 whatever your customer id is
+      }
+  '
+  ```
+  </TabItem>
+  <TabItem value="openai_sdk" label="OpenAI Python SDK">
+
+  ```python
+  import openai
+  client = openai.OpenAI(
+      api_key="anything",
+      base_url="http://0.0.0.0:4000"
+  )
+
+  # request sent to model set on litellm proxy, `litellm --model`
+  response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+      {
+          "role": "user",
+          "content": "this is a test request, write a short poem"
+      }
+  ], user="my_customer_id") # 👈 whatever your customer id is
+
+  print(response)
+  ```
+
+  </TabItem>
+  <TabItem value="langchain" label="Langchain">
+
+  ```python
+  from langchain.chat_models import ChatOpenAI
+  from langchain.prompts.chat import (
+      ChatPromptTemplate,
+      HumanMessagePromptTemplate,
+      SystemMessagePromptTemplate,
+  )
+  from langchain.schema import HumanMessage, SystemMessage
+  import os 
+
+  os.environ["OPENAI_API_KEY"] = "anything"
+
+  chat = ChatOpenAI(
+      openai_api_base="http://0.0.0.0:4000",
+      model = "gpt-3.5-turbo",
+      temperature=0.1,
+      extra_body={
+          "user": "my_customer_id"  # 👈 whatever your customer id is
+      }
+  )
+
+  messages = [
+      SystemMessage(
+          content="You are a helpful assistant that im using to make a test request to."
+      ),
+      HumanMessage(
+          content="test from litellm. tell me why it's amazing in 1 sentence"
+      ),
+  ]
+  response = chat(messages)
+
+  print(response)
+  ```
+
+  </TabItem>
+  </Tabs>
+
+</TabItem>
+<TabItem value="users" label="Internal User Billing">
+
+1. Set 'LAGO_API_CHARGE_BY' to 'user_id'
+
+```bash
+export LAGO_API_CHARGE_BY="user_id"
+```
+
+2. Create a key for that user 
+
+```bash
+curl 'http://0.0.0.0:4000/key/generate' \
+--header 'Authorization: Bearer <your-master-key>' \
+--header 'Content-Type: application/json' \
+--data-raw '{"user_id": "my-unique-id"}' # 👈 Internal User's id
+```
+
+Response Object:
+
+```bash
+{
+  "key": "sk-tXL0wt5-lOOVK9sfY2UacA",
+}
+```
+
+3. Make API Calls with that Key 
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="sk-tXL0wt5-lOOVK9sfY2UacA", # 👈 Generated key
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+    {
+        "role": "user",
+        "content": "this is a test request, write a short poem"
+    }
+])
+
+print(response)
+```
+</TabItem>
+</Tabs>
--- a/docs/my-website/docs/proxy/call_hooks.md
+++ b/docs/my-website/docs/proxy/call_hooks.md
@ -25,26 +25,45 @@ class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observabilit
    def __init__(self):
        pass

-    #### ASYNC #### 
-    
-    async def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
-        pass
-
-    async def async_log_pre_api_call(self, model, messages, kwargs):
-        pass
-
-    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
-        pass
-
-    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
-        pass
-
    #### CALL HOOKS - proxy only #### 

-    async def async_pre_call_hook(self, user_api_key_dict: UserAPIKeyAuth, cache: DualCache, data: dict, call_type: Literal["completion", "embeddings"]):
+    async def async_pre_call_hook(self, user_api_key_dict: UserAPIKeyAuth, cache: DualCache, data: dict, call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+        ]) -> Optional[dict, str, Exception]: 
        data["model"] = "my-new-model"
        return data 

+    async def async_post_call_failure_hook(
+        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
+    ):
+        pass
+
+    async def async_post_call_success_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        response,
+    ):
+        pass
+
+    async def async_moderation_hook( # call made in parallel to llm api call
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        call_type: Literal["completion", "embeddings", "image_generation"],
+    ):
+        pass
+
+    async def async_post_call_streaming_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: str,
+    ):
+        pass
 proxy_handler_instance = MyCustomHandler()
 ```

@ -190,4 +209,100 @@ general_settings:

 **Result**

-<Image img={require('../../img/end_user_enforcement.png')}/>
+<Image img={require('../../img/end_user_enforcement.png')}/>
+
+## Advanced - Return rejected message as response 
+
+For chat completions and text completion calls, you can return a rejected message as a user response. 
+
+Do this by returning a string. LiteLLM takes care of returning the response in the correct format depending on the endpoint and if it's streaming/non-streaming.
+
+For non-chat/text completion endpoints, this response is returned as a 400 status code exception. 
+
+
+### 1. Create Custom Handler 
+
+```python
+from litellm.integrations.custom_logger import CustomLogger
+import litellm
+from litellm.utils import get_formatted_prompt
+
+# This file includes the custom callbacks for LiteLLM Proxy
+# Once defined, these can be passed in proxy_config.yaml
+class MyCustomHandler(CustomLogger):
+    def __init__(self):
+        pass
+
+    #### CALL HOOKS - proxy only #### 
+
+    async def async_pre_call_hook(self, user_api_key_dict: UserAPIKeyAuth, cache: DualCache, data: dict, call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+        ]) -> Optional[dict, str, Exception]: 
+        formatted_prompt = get_formatted_prompt(data=data, call_type=call_type)
+
+        if "Hello world" in formatted_prompt:
+            return "This is an invalid response"
+
+        return data 
+
+proxy_handler_instance = MyCustomHandler()
+```
+
+### 2. Update config.yaml 
+
+```yaml
+model_list:
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: gpt-3.5-turbo
+
+litellm_settings:
+  callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
+```
+
+
+### 3. Test it!
+
+```shell
+$ litellm /path/to/config.yaml
+```
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+    --data ' {
+    "model": "gpt-3.5-turbo",
+    "messages": [
+        {
+        "role": "user",
+        "content": "Hello world"
+        }
+    ],
+    }'
+```
+
+**Expected Response**
+
+```
+{
+    "id": "chatcmpl-d00bbede-2d90-4618-bf7b-11a1c23cf360",
+    "choices": [
+        {
+            "finish_reason": "stop",
+            "index": 0,
+            "message": {
+                "content": "This is an invalid response.", # 👈 REJECTED RESPONSE
+                "role": "assistant"
+            }
+        }
+    ],
+    "created": 1716234198,
+    "model": null,
+    "object": "chat.completion",
+    "system_fingerprint": null,
+    "usage": {}
+}
+```
--- a/docs/my-website/docs/proxy/debugging.md
+++ b/docs/my-website/docs/proxy/debugging.md
@ -5,6 +5,8 @@
 - debug (prints info logs)
 - detailed debug (prints debug logs)

+The proxy also supports json logs. [See here](#json-logs)
+
 ## `debug`

 **via cli**
@ -31,4 +33,20 @@ $ litellm --detailed_debug

 ```python
 os.environ["LITELLM_LOG"] = "DEBUG"
-```
+```
+
+## JSON LOGS
+
+Set `JSON_LOGS="True"` in your env:
+
+```bash
+export JSON_LOGS="True"
+```
+
+Start proxy 
+
+```bash
+$ litellm
+```
+
+The proxy will now all logs in json format.
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@ -1,7 +1,8 @@
+import Image from '@theme/IdealImage';
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';

-# ✨ Enterprise Features - Content Mod, SSO
+# ✨ Enterprise Features - Content Mod, SSO, Custom Swagger

 Features here are behind a commercial license in our `/enterprise` folder. [**See Code**](https://github.com/BerriAI/litellm/tree/main/enterprise)

@ -20,6 +21,7 @@ Features:
 - ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
 - ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
 - ✅ Tracking Spend for Custom Tags
+- ✅ Custom Branding + Routes on Swagger Docs



@ -526,4 +528,39 @@ curl -X GET "http://0.0.0.0:4000/spend/tags" \

 <!-- ## Tracking Spend per Key

-## Tracking Spend per User -->
+## Tracking Spend per User -->
+
+## Swagger Docs - Custom Routes + Branding 
+
+:::info 
+
+Requires a LiteLLM Enterprise key to use. Request one [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
+
+:::
+
+Set LiteLLM Key in your environment
+
+```bash
+LITELLM_LICENSE=""
+```
+
+### Customize Title + Description
+
+In your environment, set: 
+
+```bash
+DOCS_TITLE="TotalGPT"
+DOCS_DESCRIPTION="Sample Company Description"
+```
+
+### Customize Routes
+
+Hide admin routes from users. 
+
+In your environment, set: 
+
+```bash
+DOCS_FILTERED="True" # only shows openai routes to user
+```
+
+<Image img={require('../../img/custom_swagger.png')}  style={{ width: '900px', height: 'auto' }} />
--- a/docs/my-website/img/custom_swagger.png
+++ b/docs/my-website/img/custom_swagger.png
--- a/docs/my-website/img/lago.jpeg
+++ b/docs/my-website/img/lago.jpeg
--- a/docs/my-website/img/lago_2.png
+++ b/docs/my-website/img/lago_2.png
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -41,6 +41,7 @@ const sidebars = {
        "proxy/reliability",
        "proxy/cost_tracking",
        "proxy/users",
+        "proxy/billing",
        "proxy/user_keys",
        "proxy/enterprise",
        "proxy/virtual_keys",
@ -175,6 +176,7 @@ const sidebars = {
        "observability/custom_callback",
        "observability/langfuse_integration",
        "observability/sentry",
+        "observability/lago",
        "observability/openmeter",
        "observability/promptlayer_integration",
        "observability/wandb_integration",