diff --git a/docs/my-website/docs/providers/fireworks_ai.md b/docs/my-website/docs/providers/fireworks_ai.md
index aa35472220..98d7c33ce7 100644
--- a/docs/my-website/docs/providers/fireworks_ai.md
+++ b/docs/my-website/docs/providers/fireworks_ai.md
@@ -190,6 +190,116 @@ print(response)
+## Document Inlining
+
+LiteLLM supports document inlining for Fireworks AI models. This is useful for models that are not vision models, but still need to parse documents/images/etc.
+
+LiteLLM will add `#transform=inline` to the url of the image_url, if the model is not a vision model.[**See Code**](https://github.com/BerriAI/litellm/blob/1ae9d45798bdaf8450f2dfdec703369f3d2212b7/litellm/llms/fireworks_ai/chat/transformation.py#L114)
+
+
+
+
+```python
+from litellm import completion
+import os
+
+os.environ["FIREWORKS_AI_API_KEY"] = "YOUR_API_KEY"
+os.environ["FIREWORKS_AI_API_BASE"] = "https://audio-prod.us-virginia-1.direct.fireworks.ai/v1"
+
+completion = litellm.completion(
+ model="fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://storage.googleapis.com/fireworks-public/test/sample_resume.pdf"
+ },
+ },
+ {
+ "type": "text",
+ "text": "What are the candidate's BA and MBA GPAs?",
+ },
+ ],
+ }
+ ],
+)
+print(completion)
+```
+
+
+
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+ - model_name: llama-v3p3-70b-instruct
+ litellm_params:
+ model: fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct
+ api_key: os.environ/FIREWORKS_AI_API_KEY
+ # api_base: os.environ/FIREWORKS_AI_API_BASE [OPTIONAL], defaults to "https://api.fireworks.ai/inference/v1"
+```
+
+2. Start Proxy
+
+```
+litellm --config config.yaml
+```
+
+3. Test it
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer YOUR_API_KEY' \
+-d '{"model": "llama-v3p3-70b-instruct",
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://storage.googleapis.com/fireworks-public/test/sample_resume.pdf"
+ },
+ },
+ {
+ "type": "text",
+ "text": "What are the candidate's BA and MBA GPAs?",
+ },
+ ],
+ }
+ ]}'
+```
+
+
+
+
+### Disable Auto-add
+
+If you want to disable the auto-add of `#transform=inline` to the url of the image_url, you can set the `auto_add_transform_inline` to `False` in the `FireworksAIConfig` class.
+
+
+
+
+```python
+litellm.disable_add_transform_inline_image_block = True
+```
+
+
+
+
+```yaml
+litellm_settings:
+ disable_add_transform_inline_image_block: true
+```
+
+
+
+
## Supported Models - ALL Fireworks AI Models Supported!
:::info
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
index 5559592407..ea5d104a71 100644
--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@@ -138,6 +138,7 @@ general_settings:
| disable_end_user_cost_tracking | boolean | If true, turns off end user cost tracking on prometheus metrics + litellm spend logs table on proxy. |
| disable_end_user_cost_tracking_prometheus_only | boolean | If true, turns off end user cost tracking on prometheus metrics only. |
| key_generation_settings | object | Restricts who can generate keys. [Further docs](./virtual_keys.md#restricting-key-generation) |
+| disable_add_transform_inline_image_block | boolean | For Fireworks AI models - if true, turns off the auto-add of `#transform=inline` to the url of the image_url, if the model is not a vision model. |
### general_settings - Reference
diff --git a/docs/my-website/docs/proxy/model_access.md b/docs/my-website/docs/proxy/model_access.md
new file mode 100644
index 0000000000..545d74865b
--- /dev/null
+++ b/docs/my-website/docs/proxy/model_access.md
@@ -0,0 +1,346 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Control Model Access
+
+## **Restrict models by Virtual Key**
+
+Set allowed models for a key using the `models` param
+
+
+```shell
+curl 'http://0.0.0.0:4000/key/generate' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4"]}'
+```
+
+:::info
+
+This key can only make requests to `models` that are `gpt-3.5-turbo` or `gpt-4`
+
+:::
+
+Verify this is set correctly by
+
+
+
+
+```shell
+curl -i http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-1234" \
+ -d '{
+ "model": "gpt-4",
+ "messages": [
+ {"role": "user", "content": "Hello"}
+ ]
+ }'
+```
+
+
+
+
+
+:::info
+
+Expect this to fail since gpt-4o is not in the `models` for the key generated
+
+:::
+
+```shell
+curl -i http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-1234" \
+ -d '{
+ "model": "gpt-4o",
+ "messages": [
+ {"role": "user", "content": "Hello"}
+ ]
+ }'
+```
+
+
+
+
+
+
+### [API Reference](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post)
+
+## **Restrict models by `team_id`**
+`litellm-dev` can only access `azure-gpt-3.5`
+
+**1. Create a team via `/team/new`**
+```shell
+curl --location 'http://localhost:4000/team/new' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+ "team_alias": "litellm-dev",
+ "models": ["azure-gpt-3.5"]
+}'
+
+# returns {...,"team_id": "my-unique-id"}
+```
+
+**2. Create a key for team**
+```shell
+curl --location 'http://localhost:4000/key/generate' \
+--header 'Authorization: Bearer sk-1234' \
+--header 'Content-Type: application/json' \
+--data-raw '{"team_id": "my-unique-id"}'
+```
+
+**3. Test it**
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+ --header 'Content-Type: application/json' \
+ --header 'Authorization: Bearer sk-qo992IjKOC2CHKZGRoJIGA' \
+ --data '{
+ "model": "BEDROCK_GROUP",
+ "messages": [
+ {
+ "role": "user",
+ "content": "hi"
+ }
+ ]
+ }'
+```
+
+```shell
+{"error":{"message":"Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n\nTraceback (most recent call last):\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/proxy_server.py\", line 2298, in chat_completion\n _is_valid_team_configs(\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/utils.py\", line 1296, in _is_valid_team_configs\n raise Exception(\nException: Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n","type":"None","param":"None","code":500}}%
+```
+
+### [API Reference](https://litellm-api.up.railway.app/#/team%20management/new_team_team_new_post)
+
+
+## **Model Access Groups**
+
+Use model access groups to give users access to select models, and add new ones to it over time (e.g. mistral, llama-2, etc.)
+
+**Step 1. Assign model, access group in config.yaml**
+
+```yaml
+model_list:
+ - model_name: gpt-4
+ litellm_params:
+ model: openai/fake
+ api_key: fake-key
+ api_base: https://exampleopenaiendpoint-production.up.railway.app/
+ model_info:
+ access_groups: ["beta-models"] # 👈 Model Access Group
+ - model_name: fireworks-llama-v3-70b-instruct
+ litellm_params:
+ model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
+ api_key: "os.environ/FIREWORKS"
+ model_info:
+ access_groups: ["beta-models"] # 👈 Model Access Group
+```
+
+
+
+
+
+**Create key with access group**
+
+```bash
+curl --location 'http://localhost:4000/key/generate' \
+-H 'Authorization: Bearer ' \
+-H 'Content-Type: application/json' \
+-d '{"models": ["beta-models"], # 👈 Model Access Group
+ "max_budget": 0,}'
+```
+
+Test Key
+
+
+
+
+```shell
+curl -i http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-" \
+ -d '{
+ "model": "gpt-4",
+ "messages": [
+ {"role": "user", "content": "Hello"}
+ ]
+ }'
+```
+
+
+
+
+
+:::info
+
+Expect this to fail since gpt-4o is not in the `beta-models` access group
+
+:::
+
+```shell
+curl -i http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-" \
+ -d '{
+ "model": "gpt-4o",
+ "messages": [
+ {"role": "user", "content": "Hello"}
+ ]
+ }'
+```
+
+
+
+
+
+
+
+
+
+Create Team
+
+```shell
+curl --location 'http://localhost:4000/team/new' \
+-H 'Authorization: Bearer sk-' \
+-H 'Content-Type: application/json' \
+-d '{"models": ["beta-models"]}'
+```
+
+Create Key for Team
+
+```shell
+curl --location 'http://0.0.0.0:4000/key/generate' \
+--header 'Authorization: Bearer sk-' \
+--header 'Content-Type: application/json' \
+--data '{"team_id": "0ac97648-c194-4c90-8cd6-40af7b0d2d2a"}
+```
+
+
+Test Key
+
+
+
+
+```shell
+curl -i http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-" \
+ -d '{
+ "model": "gpt-4",
+ "messages": [
+ {"role": "user", "content": "Hello"}
+ ]
+ }'
+```
+
+
+
+
+
+:::info
+
+Expect this to fail since gpt-4o is not in the `beta-models` access group
+
+:::
+
+```shell
+curl -i http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-" \
+ -d '{
+ "model": "gpt-4o",
+ "messages": [
+ {"role": "user", "content": "Hello"}
+ ]
+ }'
+```
+
+
+
+
+
+
+
+
+
+
+### ✨ Control Access on Wildcard Models
+
+Control access to all models with a specific prefix (e.g. `openai/*`).
+
+Use this to also give users access to all models, except for a few that you don't want them to use (e.g. `openai/o1-*`).
+
+:::info
+
+Setting model access groups on wildcard models is an Enterprise feature.
+
+See pricing [here](https://litellm.ai/#pricing)
+
+Get a trial key [here](https://litellm.ai/#trial)
+:::
+
+
+1. Setup config.yaml
+
+
+```yaml
+model_list:
+ - model_name: openai/*
+ litellm_params:
+ model: openai/*
+ api_key: os.environ/OPENAI_API_KEY
+ model_info:
+ access_groups: ["default-models"]
+ - model_name: openai/o1-*
+ litellm_params:
+ model: openai/o1-*
+ api_key: os.environ/OPENAI_API_KEY
+ model_info:
+ access_groups: ["restricted-models"]
+```
+
+2. Generate a key with access to `default-models`
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/key/generate' \
+-H 'Authorization: Bearer sk-1234' \
+-H 'Content-Type: application/json' \
+-d '{
+ "models": ["default-models"],
+}'
+```
+
+3. Test the key
+
+
+
+
+```bash
+curl -i http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-" \
+ -d '{
+ "model": "openai/gpt-4",
+ "messages": [
+ {"role": "user", "content": "Hello"}
+ ]
+ }'
+```
+
+
+
+```bash
+curl -i http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-" \
+ -d '{
+ "model": "openai/o1-mini",
+ "messages": [
+ {"role": "user", "content": "Hello"}
+ ]
+ }'
+```
+
+
+
diff --git a/docs/my-website/docs/proxy/virtual_keys.md b/docs/my-website/docs/proxy/virtual_keys.md
index 2107698f32..254b50bca3 100644
--- a/docs/my-website/docs/proxy/virtual_keys.md
+++ b/docs/my-website/docs/proxy/virtual_keys.md
@@ -224,272 +224,13 @@ Expected Response
-## **Model Access**
-### **Restrict models by Virtual Key**
-
-Set allowed models for a key using the `models` param
-
-
-```shell
-curl 'http://0.0.0.0:4000/key/generate' \
---header 'Authorization: Bearer ' \
---header 'Content-Type: application/json' \
---data-raw '{"models": ["gpt-3.5-turbo", "gpt-4"]}'
-```
-
-:::info
-
-This key can only make requests to `models` that are `gpt-3.5-turbo` or `gpt-4`
-
-:::
-
-Verify this is set correctly by
-
-
-
-
-```shell
-curl -i http://localhost:4000/v1/chat/completions \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer sk-1234" \
- -d '{
- "model": "gpt-4",
- "messages": [
- {"role": "user", "content": "Hello"}
- ]
- }'
-```
-
-
-
-
-
-:::info
-
-Expect this to fail since gpt-4o is not in the `models` for the key generated
-
-:::
-
-```shell
-curl -i http://localhost:4000/v1/chat/completions \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer sk-1234" \
- -d '{
- "model": "gpt-4o",
- "messages": [
- {"role": "user", "content": "Hello"}
- ]
- }'
-```
-
-
-
-
-
-### **Restrict models by `team_id`**
-`litellm-dev` can only access `azure-gpt-3.5`
-
-**1. Create a team via `/team/new`**
-```shell
-curl --location 'http://localhost:4000/team/new' \
---header 'Authorization: Bearer ' \
---header 'Content-Type: application/json' \
---data-raw '{
- "team_alias": "litellm-dev",
- "models": ["azure-gpt-3.5"]
-}'
-
-# returns {...,"team_id": "my-unique-id"}
-```
-
-**2. Create a key for team**
-```shell
-curl --location 'http://localhost:4000/key/generate' \
---header 'Authorization: Bearer sk-1234' \
---header 'Content-Type: application/json' \
---data-raw '{"team_id": "my-unique-id"}'
-```
-
-**3. Test it**
-```shell
-curl --location 'http://0.0.0.0:4000/chat/completions' \
- --header 'Content-Type: application/json' \
- --header 'Authorization: Bearer sk-qo992IjKOC2CHKZGRoJIGA' \
- --data '{
- "model": "BEDROCK_GROUP",
- "messages": [
- {
- "role": "user",
- "content": "hi"
- }
- ]
- }'
-```
-
-```shell
-{"error":{"message":"Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n\nTraceback (most recent call last):\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/proxy_server.py\", line 2298, in chat_completion\n _is_valid_team_configs(\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/utils.py\", line 1296, in _is_valid_team_configs\n raise Exception(\nException: Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n","type":"None","param":"None","code":500}}%
-```
-
-### **Grant Access to new model (Access Groups)**
-
-Use model access groups to give users access to select models, and add new ones to it over time (e.g. mistral, llama-2, etc.)
-
-**Step 1. Assign model, access group in config.yaml**
-
-```yaml
-model_list:
- - model_name: gpt-4
- litellm_params:
- model: openai/fake
- api_key: fake-key
- api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_info:
- access_groups: ["beta-models"] # 👈 Model Access Group
- - model_name: fireworks-llama-v3-70b-instruct
- litellm_params:
- model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
- api_key: "os.environ/FIREWORKS"
- model_info:
- access_groups: ["beta-models"] # 👈 Model Access Group
-```
-
-
-
-
-
-**Create key with access group**
-
-```bash
-curl --location 'http://localhost:4000/key/generate' \
--H 'Authorization: Bearer ' \
--H 'Content-Type: application/json' \
--d '{"models": ["beta-models"], # 👈 Model Access Group
- "max_budget": 0,}'
-```
-
-Test Key
-
-
-
-
-```shell
-curl -i http://localhost:4000/v1/chat/completions \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer sk-" \
- -d '{
- "model": "gpt-4",
- "messages": [
- {"role": "user", "content": "Hello"}
- ]
- }'
-```
-
-
-
-
-
-:::info
-
-Expect this to fail since gpt-4o is not in the `beta-models` access group
-
-:::
-
-```shell
-curl -i http://localhost:4000/v1/chat/completions \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer sk-" \
- -d '{
- "model": "gpt-4o",
- "messages": [
- {"role": "user", "content": "Hello"}
- ]
- }'
-```
-
-
-
-
-
-
-
-
-
-Create Team
-
-```shell
-curl --location 'http://localhost:4000/team/new' \
--H 'Authorization: Bearer sk-' \
--H 'Content-Type: application/json' \
--d '{"models": ["beta-models"]}'
-```
-
-Create Key for Team
-
-```shell
-curl --location 'http://0.0.0.0:4000/key/generate' \
---header 'Authorization: Bearer sk-' \
---header 'Content-Type: application/json' \
---data '{"team_id": "0ac97648-c194-4c90-8cd6-40af7b0d2d2a"}
-```
-
-
-Test Key
-
-
-
-
-```shell
-curl -i http://localhost:4000/v1/chat/completions \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer sk-" \
- -d '{
- "model": "gpt-4",
- "messages": [
- {"role": "user", "content": "Hello"}
- ]
- }'
-```
-
-
-
-
-
-:::info
-
-Expect this to fail since gpt-4o is not in the `beta-models` access group
-
-:::
-
-```shell
-curl -i http://localhost:4000/v1/chat/completions \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer sk-" \
- -d '{
- "model": "gpt-4o",
- "messages": [
- {"role": "user", "content": "Hello"}
- ]
- }'
-```
-
-
-
-
-
-
-
-
-
-
-### Model Aliases
+## Model Aliases
If a user is expected to use a given model (i.e. gpt3-5), and you want to:
- try to upgrade the request (i.e. GPT4)
- or downgrade it (i.e. Mistral)
-- OR rotate the API KEY (i.e. open AI)
-- OR access the same model through different end points (i.e. openAI vs openrouter vs Azure)
Here's how you can do that:
@@ -509,13 +250,13 @@ model_list:
litellm_params:
model: huggingface/HuggingFaceH4/zephyr-7b-beta
api_base: http://0.0.0.0:8003
- - model_name: my-paid-tier
+ - model_name: my-paid-tier
litellm_params:
model: gpt-4
api_key: my-api-key
```
-**Step 2: Generate a user key - enabling them access to specific models, custom model aliases, etc.**
+**Step 2: Generate a key**
```bash
curl -X POST "https://0.0.0.0:4000/key/generate" \
@@ -523,13 +264,29 @@ curl -X POST "https://0.0.0.0:4000/key/generate" \
-H "Content-Type: application/json" \
-d '{
"models": ["my-free-tier"],
- "aliases": {"gpt-3.5-turbo": "my-free-tier"},
+ "aliases": {"gpt-3.5-turbo": "my-free-tier"}, # 👈 KEY CHANGE
"duration": "30min"
}'
```
- **How to upgrade / downgrade request?** Change the alias mapping
-- **How are routing between diff keys/api bases done?** litellm handles this by shuffling between different models in the model list with the same model_name. [**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
+
+**Step 3: Test the key**
+
+```bash
+curl -X POST "https://0.0.0.0:4000/key/generate" \
+-H "Authorization: Bearer " \
+-H "Content-Type: application/json" \
+-d '{
+ "model": "gpt-3.5-turbo",
+ "messages": [
+ {
+ "role": "user",
+ "content": "this is a test request, write a short poem"
+ }
+ ]
+}'
+```
## Advanced
diff --git a/docs/my-website/docs/wildcard_routing.md b/docs/my-website/docs/wildcard_routing.md
index 80926d73e5..5cb5b8d9b9 100644
--- a/docs/my-website/docs/wildcard_routing.md
+++ b/docs/my-website/docs/wildcard_routing.md
@@ -138,3 +138,6 @@ curl http://localhost:4000/v1/chat/completions \
+
+
+## [[PROXY-Only] Control Wildcard Model Access](./proxy/model_access#-control-access-on-wildcard-models)
\ No newline at end of file
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index bbf951b791..8f237f05b6 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -81,6 +81,14 @@ const sidebars = {
"proxy/multiple_admins",
],
},
+ {
+ type: "category",
+ label: "Model Access",
+ items: [
+ "proxy/model_access",
+ "proxy/team_model_add"
+ ]
+ },
{
type: "category",
label: "Admin UI",
@@ -91,13 +99,6 @@ const sidebars = {
"proxy/custom_sso"
],
},
- {
- type: "category",
- label: "Team Management",
- items: [
- "proxy/team_model_add"
- ],
- },
{
type: "category",
label: "Spend Tracking",
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 48d5172e41..212f1514c3 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -151,6 +151,7 @@ use_client: bool = False
ssl_verify: Union[str, bool] = True
ssl_certificate: Optional[str] = None
disable_streaming_logging: bool = False
+disable_add_transform_inline_image_block: bool = False
in_memory_llm_clients_cache: InMemoryCache = InMemoryCache()
safe_memory_mode: bool = False
enable_azure_ad_token_refresh: Optional[bool] = False
diff --git a/litellm/llms/base_llm/base_utils.py b/litellm/llms/base_llm/base_utils.py
new file mode 100644
index 0000000000..dca8c2504c
--- /dev/null
+++ b/litellm/llms/base_llm/base_utils.py
@@ -0,0 +1,9 @@
+from abc import ABC, abstractmethod
+
+from litellm.types.utils import ModelInfoBase
+
+
+class BaseLLMModelInfo(ABC):
+ @abstractmethod
+ def get_model_info(self, model: str) -> ModelInfoBase:
+ pass
diff --git a/litellm/llms/fireworks_ai/chat/transformation.py b/litellm/llms/fireworks_ai/chat/transformation.py
index 4753cbc00e..0879d2579f 100644
--- a/litellm/llms/fireworks_ai/chat/transformation.py
+++ b/litellm/llms/fireworks_ai/chat/transformation.py
@@ -1,12 +1,15 @@
-from typing import List, Literal, Optional, Tuple, Union
+from typing import List, Literal, Optional, Tuple, Union, cast
+import litellm
+from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import AllMessageValues
+from litellm.types.llms.openai import AllMessageValues, ChatCompletionImageObject
+from litellm.types.utils import ModelInfoBase, ProviderSpecificModelInfo
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
-class FireworksAIConfig(OpenAIGPTConfig):
+class FireworksAIConfig(BaseLLMModelInfo, OpenAIGPTConfig):
"""
Reference: https://docs.fireworks.ai/api-reference/post-chatcompletions
@@ -110,6 +113,80 @@ class FireworksAIConfig(OpenAIGPTConfig):
optional_params[param] = value
return optional_params
+ def _add_transform_inline_image_block(
+ self,
+ content: ChatCompletionImageObject,
+ model: str,
+ disable_add_transform_inline_image_block: Optional[bool],
+ ) -> ChatCompletionImageObject:
+ """
+ Add transform_inline to the image_url (allows non-vision models to parse documents/images/etc.)
+ - ignore if model is a vision model
+ - ignore if user has disabled this feature
+ """
+ if (
+ "vision" in model or disable_add_transform_inline_image_block
+ ): # allow user to toggle this feature.
+ return content
+ if isinstance(content["image_url"], str):
+ content["image_url"] = f"{content['image_url']}#transform=inline"
+ elif isinstance(content["image_url"], dict):
+ content["image_url"][
+ "url"
+ ] = f"{content['image_url']['url']}#transform=inline"
+ return content
+
+ def _transform_messages_helper(
+ self, messages: List[AllMessageValues], model: str, litellm_params: dict
+ ) -> List[AllMessageValues]:
+ """
+ Add 'transform=inline' to the url of the image_url
+ """
+ disable_add_transform_inline_image_block = cast(
+ Optional[bool],
+ litellm_params.get(
+ "disable_add_transform_inline_image_block",
+ litellm.disable_add_transform_inline_image_block,
+ ),
+ )
+ for message in messages:
+ if message["role"] == "user":
+ _message_content = message.get("content")
+ if _message_content is not None and isinstance(_message_content, list):
+ for content in _message_content:
+ if content["type"] == "image_url":
+ content = self._add_transform_inline_image_block(
+ content=content,
+ model=model,
+ disable_add_transform_inline_image_block=disable_add_transform_inline_image_block,
+ )
+ return messages
+
+ def get_model_info(
+ self, model: str, existing_model_info: Optional[ModelInfoBase] = None
+ ) -> ModelInfoBase:
+ provider_specific_model_info = ProviderSpecificModelInfo(
+ supports_function_calling=True,
+ supports_prompt_caching=True, # https://docs.fireworks.ai/guides/prompt-caching
+ supports_pdf_input=True, # via document inlining
+ supports_vision=True, # via document inlining
+ )
+ if existing_model_info is not None:
+ return ModelInfoBase(
+ **{**existing_model_info, **provider_specific_model_info}
+ )
+ return ModelInfoBase(
+ key=model,
+ litellm_provider="fireworks_ai",
+ mode="chat",
+ input_cost_per_token=0.0,
+ output_cost_per_token=0.0,
+ max_tokens=None,
+ max_input_tokens=None,
+ max_output_tokens=None,
+ **provider_specific_model_info,
+ )
+
def transform_request(
self,
model: str,
@@ -120,6 +197,9 @@ class FireworksAIConfig(OpenAIGPTConfig):
) -> dict:
if not model.startswith("accounts/"):
model = f"accounts/fireworks/models/{model}"
+ messages = self._transform_messages_helper(
+ messages=messages, model=model, litellm_params=litellm_params
+ )
return super().transform_request(
model=model,
messages=messages,
diff --git a/litellm/main.py b/litellm/main.py
index e36774b2b7..928fc47d9e 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -899,6 +899,10 @@ def completion( # type: ignore # noqa: PLR0915
hf_model_name = kwargs.get("hf_model_name", None)
supports_system_message = kwargs.get("supports_system_message", None)
base_model = kwargs.get("base_model", None)
+ ### DISABLE FLAGS ###
+ disable_add_transform_inline_image_block = kwargs.get(
+ "disable_add_transform_inline_image_block", None
+ )
### TEXT COMPLETION CALLS ###
text_completion = kwargs.get("text_completion", False)
atext_completion = kwargs.get("atext_completion", False)
@@ -956,14 +960,11 @@ def completion( # type: ignore # noqa: PLR0915
"top_logprobs",
"extra_headers",
]
-
default_params = openai_params + all_litellm_params
-
litellm_params = {} # used to prevent unbound var errors
non_default_params = {
k: v for k, v in kwargs.items() if k not in default_params
} # model-specific params - pass them straight to the model/provider
-
## PROMPT MANAGEMENT HOOKS ##
if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and prompt_id is not None:
@@ -1156,6 +1157,7 @@ def completion( # type: ignore # noqa: PLR0915
hf_model_name=hf_model_name,
custom_prompt_dict=custom_prompt_dict,
litellm_metadata=kwargs.get("litellm_metadata"),
+ disable_add_transform_inline_image_block=disable_add_transform_inline_image_block,
)
logging.update_environment_variables(
model=model,
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 9c37d84f8a..86d7f72f84 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,17 +1,13 @@
model_list:
- - model_name: model-test
+ - model_name: openai/*
litellm_params:
- model: openai/gpt-3.5-turbo
+ model: openai/*
api_key: os.environ/OPENAI_API_KEY
- mock_response: "Hello, world!"
- rpm: 1
- - model_name: model-test
+ model_info:
+ access_groups: ["default-models"]
+ - model_name: openai/o1-*
litellm_params:
- model: openai/o1-mini
+ model: openai/o1-*
api_key: os.environ/OPENAI_API_KEY
- mock_response: "Hello, world, it's o1!"
- rpm: 10
-
-router_settings:
- routing_strategy: usage-based-routing-v2
- disable_cooldowns: True
+ model_info:
+ access_groups: ["restricted-models"]
\ No newline at end of file
diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py
index 5550309069..580f23687a 100644
--- a/litellm/proxy/management_endpoints/key_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/key_management_endpoints.py
@@ -16,7 +16,7 @@ import secrets
import traceback
import uuid
from datetime import datetime, timedelta, timezone
-from typing import List, Optional, Tuple, cast
+from typing import List, Literal, Optional, Tuple, cast
import fastapi
from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request, status
@@ -38,6 +38,7 @@ from litellm.proxy.utils import (
duration_in_seconds,
handle_exception_on_proxy,
)
+from litellm.router import Router
from litellm.secret_managers.main import get_secret
from litellm.types.utils import (
BudgetConfig,
@@ -330,6 +331,8 @@ async def generate_key_fn( # noqa: PLR0915
try:
from litellm.proxy.proxy_server import (
litellm_proxy_admin_name,
+ llm_router,
+ premium_user,
prisma_client,
user_api_key_cache,
user_custom_key_generate,
@@ -386,6 +389,12 @@ async def generate_key_fn( # noqa: PLR0915
detail=str(e),
)
+ _check_model_access_group(
+ models=data.models,
+ llm_router=llm_router,
+ premium_user=premium_user,
+ )
+
# check if user set default key/generate params on config.yaml
if litellm.default_key_generate_params is not None:
for elem in data:
@@ -992,6 +1001,34 @@ async def info_key_fn(
raise handle_exception_on_proxy(e)
+def _check_model_access_group(
+ models: Optional[List[str]], llm_router: Optional[Router], premium_user: bool
+) -> Literal[True]:
+ """
+ if is_model_access_group is True + is_wildcard_route is True, check if user is a premium user
+
+ Return True if user is a premium user, False otherwise
+ """
+ if models is None or llm_router is None:
+ return True
+
+ for model in models:
+ if llm_router._is_model_access_group_for_wildcard_route(
+ model_access_group=model
+ ):
+ if not premium_user:
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail={
+ "error": "Setting a model access group on a wildcard model is only available for LiteLLM Enterprise users.{}".format(
+ CommonProxyErrors.not_premium_user.value
+ )
+ },
+ )
+
+ return True
+
+
async def generate_key_helper_fn( # noqa: PLR0915
request_type: Literal[
"user", "key"
diff --git a/litellm/router.py b/litellm/router.py
index 065905503e..9657e89e58 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -4713,10 +4713,14 @@ class Router:
return None
def get_model_access_groups(
- self, model_name: Optional[str] = None
+ self, model_name: Optional[str] = None, model_access_group: Optional[str] = None
) -> Dict[str, List[str]]:
"""
If model_name is provided, only return access groups for that model.
+
+ Parameters:
+ - model_name: Optional[str] - the received model name from the user (can be a wildcard route). If set, will only return access groups for that model.
+ - model_access_group: Optional[str] - the received model access group from the user. If set, will only return models for that access group.
"""
from collections import defaultdict
@@ -4726,11 +4730,39 @@ class Router:
if model_list:
for m in model_list:
for group in m.get("model_info", {}).get("access_groups", []):
- model_name = m["model_name"]
- access_groups[group].append(model_name)
+ if model_access_group is not None:
+ if group == model_access_group:
+ model_name = m["model_name"]
+ access_groups[group].append(model_name)
+ else:
+ model_name = m["model_name"]
+ access_groups[group].append(model_name)
return access_groups
+ def _is_model_access_group_for_wildcard_route(
+ self, model_access_group: str
+ ) -> bool:
+ """
+ Return True if model access group is a wildcard route
+ """
+ # GET ACCESS GROUPS
+ access_groups = self.get_model_access_groups(
+ model_access_group=model_access_group
+ )
+
+ if len(access_groups) == 0:
+ return False
+
+ models = access_groups.get(model_access_group, [])
+
+ for model in models:
+ # CHECK IF MODEL ACCESS GROUP IS A WILDCARD ROUTE
+ if self.pattern_router.route(request=model) is not None:
+ return True
+
+ return False
+
def get_settings(self):
"""
Get router settings method, returns a dictionary of the settings and their values.
diff --git a/litellm/router_utils/pattern_match_deployments.py b/litellm/router_utils/pattern_match_deployments.py
index a0d590f23c..729510574a 100644
--- a/litellm/router_utils/pattern_match_deployments.py
+++ b/litellm/router_utils/pattern_match_deployments.py
@@ -128,7 +128,7 @@ class PatternMatchRouter:
if no pattern is found, return None
Args:
- request: Optional[str]
+ request: str - the received model name from the user (can be a wildcard route). If none, No deployments will be returned.
filtered_model_names: Optional[List[str]] - if provided, only return deployments that match the filtered_model_names
Returns:
Optional[List[Deployment]]: llm deployments
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index ed9f2ee98c..d8b4bf282f 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -75,7 +75,20 @@ class ProviderField(TypedDict):
field_value: str
-class ModelInfoBase(TypedDict, total=False):
+class ProviderSpecificModelInfo(TypedDict, total=False):
+ supports_system_messages: Optional[bool]
+ supports_response_schema: Optional[bool]
+ supports_vision: Optional[bool]
+ supports_function_calling: Optional[bool]
+ supports_assistant_prefill: Optional[bool]
+ supports_prompt_caching: Optional[bool]
+ supports_audio_input: Optional[bool]
+ supports_embedding_image_input: Optional[bool]
+ supports_audio_output: Optional[bool]
+ supports_pdf_input: Optional[bool]
+
+
+class ModelInfoBase(ProviderSpecificModelInfo, total=False):
key: Required[str] # the key in litellm.model_cost which is returned
max_tokens: Required[Optional[int]]
@@ -116,16 +129,6 @@ class ModelInfoBase(TypedDict, total=False):
"completion", "embedding", "image_generation", "chat", "audio_transcription"
]
]
- supports_system_messages: Optional[bool]
- supports_response_schema: Optional[bool]
- supports_vision: Optional[bool]
- supports_function_calling: Optional[bool]
- supports_assistant_prefill: Optional[bool]
- supports_prompt_caching: Optional[bool]
- supports_audio_input: Optional[bool]
- supports_embedding_image_input: Optional[bool]
- supports_audio_output: Optional[bool]
- supports_pdf_input: Optional[bool]
tpm: Optional[int]
rpm: Optional[int]
@@ -1613,6 +1616,7 @@ all_litellm_params = [
"caching",
"mock_response",
"mock_timeout",
+ "disable_add_transform_inline_image_block",
"api_key",
"api_version",
"prompt_id",
diff --git a/litellm/utils.py b/litellm/utils.py
index 6decadc719..37e50b55ad 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -174,6 +174,7 @@ from openai import OpenAIError as OriginalError
from litellm.llms.base_llm.audio_transcription.transformation import (
BaseAudioTranscriptionConfig,
)
+from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
from litellm.llms.base_llm.chat.transformation import BaseConfig
from litellm.llms.base_llm.completion.transformation import BaseTextCompletionConfig
from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
@@ -1989,6 +1990,7 @@ def get_litellm_params(
hf_model_name: Optional[str] = None,
custom_prompt_dict: Optional[dict] = None,
litellm_metadata: Optional[dict] = None,
+ disable_add_transform_inline_image_block: Optional[bool] = None,
):
litellm_params = {
"acompletion": acompletion,
@@ -2021,6 +2023,7 @@ def get_litellm_params(
"hf_model_name": hf_model_name,
"custom_prompt_dict": custom_prompt_dict,
"litellm_metadata": litellm_metadata,
+ "disable_add_transform_inline_image_block": disable_add_transform_inline_image_block,
}
return litellm_params
@@ -4373,6 +4376,17 @@ def _get_model_info_helper( # noqa: PLR0915
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
+ if _model_info is None and ProviderConfigManager.get_provider_model_info(
+ model=model, provider=LlmProviders(custom_llm_provider)
+ ):
+ provider_config = ProviderConfigManager.get_provider_model_info(
+ model=model, provider=LlmProviders(custom_llm_provider)
+ )
+ if provider_config is not None:
+ _model_info = cast(
+ dict, provider_config.get_model_info(model=model)
+ )
+ key = "provider_specific_model_info"
if _model_info is None or key is None:
raise ValueError(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
@@ -6338,6 +6352,15 @@ class ProviderConfigManager:
return litellm.TogetherAITextCompletionConfig()
return litellm.OpenAITextCompletionConfig()
+ @staticmethod
+ def get_provider_model_info(
+ model: str,
+ provider: LlmProviders,
+ ) -> Optional[BaseLLMModelInfo]:
+ if LlmProviders.FIREWORKS_AI == provider:
+ return litellm.FireworksAIConfig()
+ return None
+
def get_end_user_id_for_cost_tracking(
litellm_params: dict,
diff --git a/tests/llm_translation/test_fireworks_ai_translation.py b/tests/llm_translation/test_fireworks_ai_translation.py
index f91402a86a..f38efd96dc 100644
--- a/tests/llm_translation/test_fireworks_ai_translation.py
+++ b/tests/llm_translation/test_fireworks_ai_translation.py
@@ -103,3 +103,96 @@ class TestFireworksAIAudioTranscription(BaseLLMAudioTranscriptionTest):
def get_custom_llm_provider(self) -> litellm.LlmProviders:
return litellm.LlmProviders.FIREWORKS_AI
+
+
+@pytest.mark.parametrize(
+ "disable_add_transform_inline_image_block",
+ [True, False],
+)
+def test_document_inlining_example(disable_add_transform_inline_image_block):
+ litellm.set_verbose = True
+ if disable_add_transform_inline_image_block is True:
+ with pytest.raises(Exception):
+ completion = litellm.completion(
+ model="fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://storage.googleapis.com/fireworks-public/test/sample_resume.pdf"
+ },
+ },
+ {
+ "type": "text",
+ "text": "What are the candidate's BA and MBA GPAs?",
+ },
+ ],
+ }
+ ],
+ disable_add_transform_inline_image_block=disable_add_transform_inline_image_block,
+ )
+ else:
+ completion = litellm.completion(
+ model="fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct",
+ messages=[
+ {
+ "role": "user",
+ "content": "this is a test request, write a short poem",
+ },
+ ],
+ disable_add_transform_inline_image_block=disable_add_transform_inline_image_block,
+ )
+ print(completion)
+
+
+@pytest.mark.parametrize(
+ "content, model, expected_url",
+ [
+ (
+ {"image_url": "http://example.com/image.png"},
+ "gpt-4",
+ "http://example.com/image.png#transform=inline",
+ ),
+ (
+ {"image_url": {"url": "http://example.com/image.png"}},
+ "gpt-4",
+ {"url": "http://example.com/image.png#transform=inline"},
+ ),
+ (
+ {"image_url": "http://example.com/image.png"},
+ "vision-gpt",
+ "http://example.com/image.png",
+ ),
+ ],
+)
+def test_transform_inline(content, model, expected_url):
+
+ result = litellm.FireworksAIConfig()._add_transform_inline_image_block(
+ content=content, model=model, disable_add_transform_inline_image_block=False
+ )
+ if isinstance(expected_url, str):
+ assert result["image_url"] == expected_url
+ else:
+ assert result["image_url"]["url"] == expected_url["url"]
+
+
+@pytest.mark.parametrize(
+ "model, is_disabled, expected_url",
+ [
+ ("gpt-4", True, "http://example.com/image.png"),
+ ("vision-gpt", False, "http://example.com/image.png"),
+ ("gpt-4", False, "http://example.com/image.png#transform=inline"),
+ ],
+)
+def test_global_disable_flag(model, is_disabled, expected_url):
+ content = {"image_url": "http://example.com/image.png"}
+ result = litellm.FireworksAIConfig()._add_transform_inline_image_block(
+ content=content,
+ model=model,
+ disable_add_transform_inline_image_block=is_disabled,
+ )
+ assert result["image_url"] == expected_url
+ litellm.disable_add_transform_inline_image_block = False # Reset for other tests
diff --git a/tests/local_testing/test_router_utils.py b/tests/local_testing/test_router_utils.py
index 706fdc55f4..fa1c6f5f9e 100644
--- a/tests/local_testing/test_router_utils.py
+++ b/tests/local_testing/test_router_utils.py
@@ -364,3 +364,23 @@ async def test_get_remaining_model_group_usage():
assert remaining_usage is not None
assert "x-ratelimit-remaining-requests" in remaining_usage
assert "x-ratelimit-remaining-tokens" in remaining_usage
+
+
+@pytest.mark.parametrize(
+ "potential_access_group, expected_result",
+ [("gemini-models", True), ("gemini-models-2", False), ("gemini/*", False)],
+)
+def test_router_get_model_access_groups(potential_access_group, expected_result):
+ router = Router(
+ model_list=[
+ {
+ "model_name": "gemini/*",
+ "litellm_params": {"model": "gemini/*"},
+ "model_info": {"id": 1, "access_groups": ["gemini-models"]},
+ },
+ ]
+ )
+ access_groups = router._is_model_access_group_for_wildcard_route(
+ model_access_group=potential_access_group
+ )
+ assert access_groups == expected_result
diff --git a/tests/local_testing/test_utils.py b/tests/local_testing/test_utils.py
index 7240d0dba4..28cff52e95 100644
--- a/tests/local_testing/test_utils.py
+++ b/tests/local_testing/test_utils.py
@@ -1240,3 +1240,15 @@ def test_token_counter_with_image_url_with_detail_high():
)
print("tokens", _tokens)
assert _tokens == DEFAULT_IMAGE_TOKEN_COUNT + 7
+
+
+def test_fireworks_ai_document_inlining():
+ """
+ With document inlining, all fireworks ai models are now:
+ - supports_pdf
+ - supports_vision
+ """
+ from litellm.utils import supports_pdf_input, supports_vision
+
+ assert supports_pdf_input("fireworks_ai/llama-3.1-8b-instruct") is True
+ assert supports_vision("fireworks_ai/llama-3.1-8b-instruct") is True