diff --git a/docs/my-website/docs/files_endpoints.md b/docs/my-website/docs/files_endpoints.md index 3c53052d35..31a02d41a3 100644 --- a/docs/my-website/docs/files_endpoints.md +++ b/docs/my-website/docs/files_endpoints.md @@ -2,10 +2,12 @@ import TabItem from '@theme/TabItem'; import Tabs from '@theme/Tabs'; -# /files +# Provider Files Endpoints Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API. +Use this to call the provider's `/files` endpoints directly, in the OpenAI format. + ## Quick Start - Upload a File @@ -19,7 +21,7 @@ Files are used to upload documents that can be used with features like Assistant -### 1. Setup config.yaml +1. Setup config.yaml ``` # for /files endpoints @@ -32,7 +34,7 @@ files_settings: api_key: os.environ/OPENAI_API_KEY ``` -### 2. Start LiteLLM PROXY Server +2. Start LiteLLM PROXY Server ```bash litellm --config /path/to/config.yaml @@ -40,7 +42,7 @@ litellm --config /path/to/config.yaml ## RUNNING on http://0.0.0.0:4000 ``` -### 3. Use OpenAI's /files endpoints +3. Use OpenAI's /files endpoints Upload a File diff --git a/docs/my-website/docs/proxy/litellm_managed_files.md b/docs/my-website/docs/proxy/litellm_managed_files.md new file mode 100644 index 0000000000..b69430dbaf --- /dev/null +++ b/docs/my-website/docs/proxy/litellm_managed_files.md @@ -0,0 +1,279 @@ +import TabItem from '@theme/TabItem'; +import Tabs from '@theme/Tabs'; +import Image from '@theme/IdealImage'; + +# [BETA] LiteLLM Managed Files + +Use this to reuse the same 'file id' across different providers. + +| Feature | Description | Comments | +| --- | --- | --- | +| Proxy | ✅ | | +| SDK | ❌ | Requires postgres DB for storing file ids | +| Available across all providers | ✅ | | + + + +Limitations of LiteLLM Managed Files: +- Only works for `/chat/completions` requests. +- Assumes just 1 model configured per model_name. + +Follow [here](https://github.com/BerriAI/litellm/discussions/9632) for multiple models, batches support. + +### 1. Setup config.yaml + +``` +model_list: + - model_name: "gemini-2.0-flash" + litellm_params: + model: vertex_ai/gemini-2.0-flash + vertex_project: my-project-id + vertex_location: us-central1 + - model_name: "gpt-4o-mini-openai" + litellm_params: + model: gpt-4o-mini + api_key: os.environ/OPENAI_API_KEY +``` + +### 2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +### 3. Test it! + +Specify `target_model_names` to use the same file id across different providers. This is the list of model_names set via config.yaml (or 'public_model_names' on UI). + +```python +target_model_names="gpt-4o-mini-openai, gemini-2.0-flash" # 👈 Specify model_names +``` + +Check `/v1/models` to see the list of available model names for a key. + +#### **Store a PDF file** + +```python +from openai import OpenAI + +client = OpenAI(base_url="http://0.0.0.0:4000", api_key="sk-1234", max_retries=0) + + +# Download and save the PDF locally +url = ( + "https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf" +) +response = requests.get(url) +response.raise_for_status() + +# Save the PDF locally +with open("2403.05530.pdf", "wb") as f: + f.write(response.content) + +file = client.files.create( + file=open("2403.05530.pdf", "rb"), + purpose="user_data", # can be any openai 'purpose' value + extra_body={"target_model_names": "gpt-4o-mini-openai, gemini-2.0-flash"}, # 👈 Specify model_names +) + +print(f"file id={file.id}") +``` + +#### **Use the same file id across different providers** + + + + +```python +completion = client.chat.completions.create( + model="gpt-4o-mini-openai", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What is in this recording?"}, + { + "type": "file", + "file": { + "file_id": file.id, + }, + }, + ], + }, + ] +) + +print(completion.choices[0].message) +``` + + + + + +```python +completion = client.chat.completions.create( + model="gemini-2.0-flash", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What is in this recording?"}, + { + "type": "file", + "file": { + "file_id": file.id, + }, + }, + ], + }, + ] +) + +print(completion.choices[0].message) + +``` + + + + +### Complete Example + +```python +import base64 +import requests +from openai import OpenAI + +client = OpenAI(base_url="http://0.0.0.0:4000", api_key="sk-1234", max_retries=0) + + +# Download and save the PDF locally +url = ( + "https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf" +) +response = requests.get(url) +response.raise_for_status() + +# Save the PDF locally +with open("2403.05530.pdf", "wb") as f: + f.write(response.content) + +# Read the local PDF file +file = client.files.create( + file=open("2403.05530.pdf", "rb"), + purpose="user_data", # can be any openai 'purpose' value + extra_body={"target_model_names": "gpt-4o-mini-openai, vertex_ai/gemini-2.0-flash"}, +) + +print(f"file.id: {file.id}") # 👈 Unified file id + +## GEMINI CALL ### +completion = client.chat.completions.create( + model="gemini-2.0-flash", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What is in this recording?"}, + { + "type": "file", + "file": { + "file_id": file.id, + }, + }, + ], + }, + ] +) + +print(completion.choices[0].message) + + +### OPENAI CALL ### +completion = client.chat.completions.create( + model="gpt-4o-mini-openai", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What is in this recording?"}, + { + "type": "file", + "file": { + "file_id": file.id, + }, + }, + ], + }, + ], +) + +print(completion.choices[0].message) + +``` + + +### Supported Endpoints + +#### Create a file - `/files` + +```python +from openai import OpenAI + +client = OpenAI(base_url="http://0.0.0.0:4000", api_key="sk-1234", max_retries=0) + +# Download and save the PDF locally +url = ( + "https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf" +) +response = requests.get(url) +response.raise_for_status() + +# Save the PDF locally +with open("2403.05530.pdf", "wb") as f: + f.write(response.content) + +# Read the local PDF file +file = client.files.create( + file=open("2403.05530.pdf", "rb"), + purpose="user_data", # can be any openai 'purpose' value + extra_body={"target_model_names": "gpt-4o-mini-openai, vertex_ai/gemini-2.0-flash"}, +) +``` + +#### Retrieve a file - `/files/{file_id}` + +```python +client = OpenAI(base_url="http://0.0.0.0:4000", api_key="sk-1234", max_retries=0) + +file = client.files.retrieve(file_id=file.id) +``` + +#### Delete a file - `/files/{file_id}/delete` + +```python +client = OpenAI(base_url="http://0.0.0.0:4000", api_key="sk-1234", max_retries=0) + +file = client.files.delete(file_id=file.id) +``` + +### FAQ + +**1. Does LiteLLM store the file?** + +No, LiteLLM does not store the file. It only stores the file id's in the postgres DB. + +**2. How does LiteLLM know which file to use for a given file id?** + +LiteLLM stores a mapping of the litellm file id to the model-specific file id in the postgres DB. When a request comes in, LiteLLM looks up the model-specific file id and uses it in the request to the provider. + +**3. How do file deletions work?** + +When a file is deleted, LiteLLM deletes the mapping from the postgres DB, and the files on each provider. + +### Architecture + + + + + + \ No newline at end of file diff --git a/docs/my-website/img/managed_files_arch.png b/docs/my-website/img/managed_files_arch.png new file mode 100644 index 0000000000..e49c47334d Binary files /dev/null and b/docs/my-website/img/managed_files_arch.png differ diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 80c5346e27..9057f9ac88 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -340,7 +340,15 @@ const sidebars = { }, "rerank", "assistants", - "files_endpoints", + + { + type: "category", + label: "/files", + items: [ + "files_endpoints", + "proxy/litellm_managed_files", + ], + }, "batches", "realtime", "fine_tuning",