diff --git a/docs/my-website/docs/providers/fireworks_ai.md b/docs/my-website/docs/providers/fireworks_ai.md index aa35472220..98d7c33ce7 100644 --- a/docs/my-website/docs/providers/fireworks_ai.md +++ b/docs/my-website/docs/providers/fireworks_ai.md @@ -190,6 +190,116 @@ print(response) +## Document Inlining + +LiteLLM supports document inlining for Fireworks AI models. This is useful for models that are not vision models, but still need to parse documents/images/etc. + +LiteLLM will add `#transform=inline` to the url of the image_url, if the model is not a vision model.[**See Code**](https://github.com/BerriAI/litellm/blob/1ae9d45798bdaf8450f2dfdec703369f3d2212b7/litellm/llms/fireworks_ai/chat/transformation.py#L114) + + + + +```python +from litellm import completion +import os + +os.environ["FIREWORKS_AI_API_KEY"] = "YOUR_API_KEY" +os.environ["FIREWORKS_AI_API_BASE"] = "https://audio-prod.us-virginia-1.direct.fireworks.ai/v1" + +completion = litellm.completion( + model="fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://storage.googleapis.com/fireworks-public/test/sample_resume.pdf" + }, + }, + { + "type": "text", + "text": "What are the candidate's BA and MBA GPAs?", + }, + ], + } + ], +) +print(completion) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: llama-v3p3-70b-instruct + litellm_params: + model: fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct + api_key: os.environ/FIREWORKS_AI_API_KEY + # api_base: os.environ/FIREWORKS_AI_API_BASE [OPTIONAL], defaults to "https://api.fireworks.ai/inference/v1" +``` + +2. Start Proxy + +``` +litellm --config config.yaml +``` + +3. Test it + +```bash +curl -L -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer YOUR_API_KEY' \ +-d '{"model": "llama-v3p3-70b-instruct", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://storage.googleapis.com/fireworks-public/test/sample_resume.pdf" + }, + }, + { + "type": "text", + "text": "What are the candidate's BA and MBA GPAs?", + }, + ], + } + ]}' +``` + + + + +### Disable Auto-add + +If you want to disable the auto-add of `#transform=inline` to the url of the image_url, you can set the `auto_add_transform_inline` to `False` in the `FireworksAIConfig` class. + + + + +```python +litellm.disable_add_transform_inline_image_block = True +``` + + + + +```yaml +litellm_settings: + disable_add_transform_inline_image_block: true +``` + + + + ## Supported Models - ALL Fireworks AI Models Supported! :::info diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 5559592407..ea5d104a71 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -138,6 +138,7 @@ general_settings: | disable_end_user_cost_tracking | boolean | If true, turns off end user cost tracking on prometheus metrics + litellm spend logs table on proxy. | | disable_end_user_cost_tracking_prometheus_only | boolean | If true, turns off end user cost tracking on prometheus metrics only. | | key_generation_settings | object | Restricts who can generate keys. [Further docs](./virtual_keys.md#restricting-key-generation) | +| disable_add_transform_inline_image_block | boolean | For Fireworks AI models - if true, turns off the auto-add of `#transform=inline` to the url of the image_url, if the model is not a vision model. | ### general_settings - Reference diff --git a/docs/my-website/docs/proxy/model_access.md b/docs/my-website/docs/proxy/model_access.md new file mode 100644 index 0000000000..545d74865b --- /dev/null +++ b/docs/my-website/docs/proxy/model_access.md @@ -0,0 +1,346 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Control Model Access + +## **Restrict models by Virtual Key** + +Set allowed models for a key using the `models` param + + +```shell +curl 'http://0.0.0.0:4000/key/generate' \ +--header 'Authorization: Bearer ' \ +--header 'Content-Type: application/json' \ +--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4"]}' +``` + +:::info + +This key can only make requests to `models` that are `gpt-3.5-turbo` or `gpt-4` + +::: + +Verify this is set correctly by + + + + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-4", + "messages": [ + {"role": "user", "content": "Hello"} + ] + }' +``` + + + + + +:::info + +Expect this to fail since gpt-4o is not in the `models` for the key generated + +::: + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Hello"} + ] + }' +``` + + + + + + +### [API Reference](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post) + +## **Restrict models by `team_id`** +`litellm-dev` can only access `azure-gpt-3.5` + +**1. Create a team via `/team/new`** +```shell +curl --location 'http://localhost:4000/team/new' \ +--header 'Authorization: Bearer ' \ +--header 'Content-Type: application/json' \ +--data-raw '{ + "team_alias": "litellm-dev", + "models": ["azure-gpt-3.5"] +}' + +# returns {...,"team_id": "my-unique-id"} +``` + +**2. Create a key for team** +```shell +curl --location 'http://localhost:4000/key/generate' \ +--header 'Authorization: Bearer sk-1234' \ +--header 'Content-Type: application/json' \ +--data-raw '{"team_id": "my-unique-id"}' +``` + +**3. Test it** +```shell +curl --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer sk-qo992IjKOC2CHKZGRoJIGA' \ + --data '{ + "model": "BEDROCK_GROUP", + "messages": [ + { + "role": "user", + "content": "hi" + } + ] + }' +``` + +```shell +{"error":{"message":"Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n\nTraceback (most recent call last):\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/proxy_server.py\", line 2298, in chat_completion\n _is_valid_team_configs(\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/utils.py\", line 1296, in _is_valid_team_configs\n raise Exception(\nException: Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n","type":"None","param":"None","code":500}}% +``` + +### [API Reference](https://litellm-api.up.railway.app/#/team%20management/new_team_team_new_post) + + +## **Model Access Groups** + +Use model access groups to give users access to select models, and add new ones to it over time (e.g. mistral, llama-2, etc.) + +**Step 1. Assign model, access group in config.yaml** + +```yaml +model_list: + - model_name: gpt-4 + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + model_info: + access_groups: ["beta-models"] # 👈 Model Access Group + - model_name: fireworks-llama-v3-70b-instruct + litellm_params: + model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct + api_key: "os.environ/FIREWORKS" + model_info: + access_groups: ["beta-models"] # 👈 Model Access Group +``` + + + + + +**Create key with access group** + +```bash +curl --location 'http://localhost:4000/key/generate' \ +-H 'Authorization: Bearer ' \ +-H 'Content-Type: application/json' \ +-d '{"models": ["beta-models"], # 👈 Model Access Group + "max_budget": 0,}' +``` + +Test Key + + + + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-" \ + -d '{ + "model": "gpt-4", + "messages": [ + {"role": "user", "content": "Hello"} + ] + }' +``` + + + + + +:::info + +Expect this to fail since gpt-4o is not in the `beta-models` access group + +::: + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-" \ + -d '{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Hello"} + ] + }' +``` + + + + + + + + + +Create Team + +```shell +curl --location 'http://localhost:4000/team/new' \ +-H 'Authorization: Bearer sk-' \ +-H 'Content-Type: application/json' \ +-d '{"models": ["beta-models"]}' +``` + +Create Key for Team + +```shell +curl --location 'http://0.0.0.0:4000/key/generate' \ +--header 'Authorization: Bearer sk-' \ +--header 'Content-Type: application/json' \ +--data '{"team_id": "0ac97648-c194-4c90-8cd6-40af7b0d2d2a"} +``` + + +Test Key + + + + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-" \ + -d '{ + "model": "gpt-4", + "messages": [ + {"role": "user", "content": "Hello"} + ] + }' +``` + + + + + +:::info + +Expect this to fail since gpt-4o is not in the `beta-models` access group + +::: + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-" \ + -d '{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Hello"} + ] + }' +``` + + + + + + + + + + +### ✨ Control Access on Wildcard Models + +Control access to all models with a specific prefix (e.g. `openai/*`). + +Use this to also give users access to all models, except for a few that you don't want them to use (e.g. `openai/o1-*`). + +:::info + +Setting model access groups on wildcard models is an Enterprise feature. + +See pricing [here](https://litellm.ai/#pricing) + +Get a trial key [here](https://litellm.ai/#trial) +::: + + +1. Setup config.yaml + + +```yaml +model_list: + - model_name: openai/* + litellm_params: + model: openai/* + api_key: os.environ/OPENAI_API_KEY + model_info: + access_groups: ["default-models"] + - model_name: openai/o1-* + litellm_params: + model: openai/o1-* + api_key: os.environ/OPENAI_API_KEY + model_info: + access_groups: ["restricted-models"] +``` + +2. Generate a key with access to `default-models` + +```bash +curl -L -X POST 'http://0.0.0.0:4000/key/generate' \ +-H 'Authorization: Bearer sk-1234' \ +-H 'Content-Type: application/json' \ +-d '{ + "models": ["default-models"], +}' +``` + +3. Test the key + + + + +```bash +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-" \ + -d '{ + "model": "openai/gpt-4", + "messages": [ + {"role": "user", "content": "Hello"} + ] + }' +``` + + + +```bash +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-" \ + -d '{ + "model": "openai/o1-mini", + "messages": [ + {"role": "user", "content": "Hello"} + ] + }' +``` + + + diff --git a/docs/my-website/docs/proxy/virtual_keys.md b/docs/my-website/docs/proxy/virtual_keys.md index 2107698f32..254b50bca3 100644 --- a/docs/my-website/docs/proxy/virtual_keys.md +++ b/docs/my-website/docs/proxy/virtual_keys.md @@ -224,272 +224,13 @@ Expected Response -## **Model Access** -### **Restrict models by Virtual Key** - -Set allowed models for a key using the `models` param - - -```shell -curl 'http://0.0.0.0:4000/key/generate' \ ---header 'Authorization: Bearer ' \ ---header 'Content-Type: application/json' \ ---data-raw '{"models": ["gpt-3.5-turbo", "gpt-4"]}' -``` - -:::info - -This key can only make requests to `models` that are `gpt-3.5-turbo` or `gpt-4` - -::: - -Verify this is set correctly by - - - - -```shell -curl -i http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "gpt-4", - "messages": [ - {"role": "user", "content": "Hello"} - ] - }' -``` - - - - - -:::info - -Expect this to fail since gpt-4o is not in the `models` for the key generated - -::: - -```shell -curl -i http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "gpt-4o", - "messages": [ - {"role": "user", "content": "Hello"} - ] - }' -``` - - - - - -### **Restrict models by `team_id`** -`litellm-dev` can only access `azure-gpt-3.5` - -**1. Create a team via `/team/new`** -```shell -curl --location 'http://localhost:4000/team/new' \ ---header 'Authorization: Bearer ' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "team_alias": "litellm-dev", - "models": ["azure-gpt-3.5"] -}' - -# returns {...,"team_id": "my-unique-id"} -``` - -**2. Create a key for team** -```shell -curl --location 'http://localhost:4000/key/generate' \ ---header 'Authorization: Bearer sk-1234' \ ---header 'Content-Type: application/json' \ ---data-raw '{"team_id": "my-unique-id"}' -``` - -**3. Test it** -```shell -curl --location 'http://0.0.0.0:4000/chat/completions' \ - --header 'Content-Type: application/json' \ - --header 'Authorization: Bearer sk-qo992IjKOC2CHKZGRoJIGA' \ - --data '{ - "model": "BEDROCK_GROUP", - "messages": [ - { - "role": "user", - "content": "hi" - } - ] - }' -``` - -```shell -{"error":{"message":"Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n\nTraceback (most recent call last):\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/proxy_server.py\", line 2298, in chat_completion\n _is_valid_team_configs(\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/utils.py\", line 1296, in _is_valid_team_configs\n raise Exception(\nException: Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n","type":"None","param":"None","code":500}}% -``` - -### **Grant Access to new model (Access Groups)** - -Use model access groups to give users access to select models, and add new ones to it over time (e.g. mistral, llama-2, etc.) - -**Step 1. Assign model, access group in config.yaml** - -```yaml -model_list: - - model_name: gpt-4 - litellm_params: - model: openai/fake - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ - model_info: - access_groups: ["beta-models"] # 👈 Model Access Group - - model_name: fireworks-llama-v3-70b-instruct - litellm_params: - model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct - api_key: "os.environ/FIREWORKS" - model_info: - access_groups: ["beta-models"] # 👈 Model Access Group -``` - - - - - -**Create key with access group** - -```bash -curl --location 'http://localhost:4000/key/generate' \ --H 'Authorization: Bearer ' \ --H 'Content-Type: application/json' \ --d '{"models": ["beta-models"], # 👈 Model Access Group - "max_budget": 0,}' -``` - -Test Key - - - - -```shell -curl -i http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-" \ - -d '{ - "model": "gpt-4", - "messages": [ - {"role": "user", "content": "Hello"} - ] - }' -``` - - - - - -:::info - -Expect this to fail since gpt-4o is not in the `beta-models` access group - -::: - -```shell -curl -i http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-" \ - -d '{ - "model": "gpt-4o", - "messages": [ - {"role": "user", "content": "Hello"} - ] - }' -``` - - - - - - - - - -Create Team - -```shell -curl --location 'http://localhost:4000/team/new' \ --H 'Authorization: Bearer sk-' \ --H 'Content-Type: application/json' \ --d '{"models": ["beta-models"]}' -``` - -Create Key for Team - -```shell -curl --location 'http://0.0.0.0:4000/key/generate' \ ---header 'Authorization: Bearer sk-' \ ---header 'Content-Type: application/json' \ ---data '{"team_id": "0ac97648-c194-4c90-8cd6-40af7b0d2d2a"} -``` - - -Test Key - - - - -```shell -curl -i http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-" \ - -d '{ - "model": "gpt-4", - "messages": [ - {"role": "user", "content": "Hello"} - ] - }' -``` - - - - - -:::info - -Expect this to fail since gpt-4o is not in the `beta-models` access group - -::: - -```shell -curl -i http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-" \ - -d '{ - "model": "gpt-4o", - "messages": [ - {"role": "user", "content": "Hello"} - ] - }' -``` - - - - - - - - - - -### Model Aliases +## Model Aliases If a user is expected to use a given model (i.e. gpt3-5), and you want to: - try to upgrade the request (i.e. GPT4) - or downgrade it (i.e. Mistral) -- OR rotate the API KEY (i.e. open AI) -- OR access the same model through different end points (i.e. openAI vs openrouter vs Azure) Here's how you can do that: @@ -509,13 +250,13 @@ model_list: litellm_params: model: huggingface/HuggingFaceH4/zephyr-7b-beta api_base: http://0.0.0.0:8003 - - model_name: my-paid-tier + - model_name: my-paid-tier litellm_params: model: gpt-4 api_key: my-api-key ``` -**Step 2: Generate a user key - enabling them access to specific models, custom model aliases, etc.** +**Step 2: Generate a key** ```bash curl -X POST "https://0.0.0.0:4000/key/generate" \ @@ -523,13 +264,29 @@ curl -X POST "https://0.0.0.0:4000/key/generate" \ -H "Content-Type: application/json" \ -d '{ "models": ["my-free-tier"], - "aliases": {"gpt-3.5-turbo": "my-free-tier"}, + "aliases": {"gpt-3.5-turbo": "my-free-tier"}, # 👈 KEY CHANGE "duration": "30min" }' ``` - **How to upgrade / downgrade request?** Change the alias mapping -- **How are routing between diff keys/api bases done?** litellm handles this by shuffling between different models in the model list with the same model_name. [**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py) + +**Step 3: Test the key** + +```bash +curl -X POST "https://0.0.0.0:4000/key/generate" \ +-H "Authorization: Bearer " \ +-H "Content-Type: application/json" \ +-d '{ + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": "this is a test request, write a short poem" + } + ] +}' +``` ## Advanced diff --git a/docs/my-website/docs/wildcard_routing.md b/docs/my-website/docs/wildcard_routing.md index 80926d73e5..5cb5b8d9b9 100644 --- a/docs/my-website/docs/wildcard_routing.md +++ b/docs/my-website/docs/wildcard_routing.md @@ -138,3 +138,6 @@ curl http://localhost:4000/v1/chat/completions \ + + +## [[PROXY-Only] Control Wildcard Model Access](./proxy/model_access#-control-access-on-wildcard-models) \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index bbf951b791..8f237f05b6 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -81,6 +81,14 @@ const sidebars = { "proxy/multiple_admins", ], }, + { + type: "category", + label: "Model Access", + items: [ + "proxy/model_access", + "proxy/team_model_add" + ] + }, { type: "category", label: "Admin UI", @@ -91,13 +99,6 @@ const sidebars = { "proxy/custom_sso" ], }, - { - type: "category", - label: "Team Management", - items: [ - "proxy/team_model_add" - ], - }, { type: "category", label: "Spend Tracking", diff --git a/litellm/__init__.py b/litellm/__init__.py index 48d5172e41..212f1514c3 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -151,6 +151,7 @@ use_client: bool = False ssl_verify: Union[str, bool] = True ssl_certificate: Optional[str] = None disable_streaming_logging: bool = False +disable_add_transform_inline_image_block: bool = False in_memory_llm_clients_cache: InMemoryCache = InMemoryCache() safe_memory_mode: bool = False enable_azure_ad_token_refresh: Optional[bool] = False diff --git a/litellm/llms/base_llm/base_utils.py b/litellm/llms/base_llm/base_utils.py new file mode 100644 index 0000000000..dca8c2504c --- /dev/null +++ b/litellm/llms/base_llm/base_utils.py @@ -0,0 +1,9 @@ +from abc import ABC, abstractmethod + +from litellm.types.utils import ModelInfoBase + + +class BaseLLMModelInfo(ABC): + @abstractmethod + def get_model_info(self, model: str) -> ModelInfoBase: + pass diff --git a/litellm/llms/fireworks_ai/chat/transformation.py b/litellm/llms/fireworks_ai/chat/transformation.py index 4753cbc00e..0879d2579f 100644 --- a/litellm/llms/fireworks_ai/chat/transformation.py +++ b/litellm/llms/fireworks_ai/chat/transformation.py @@ -1,12 +1,15 @@ -from typing import List, Literal, Optional, Tuple, Union +from typing import List, Literal, Optional, Tuple, Union, cast +import litellm +from litellm.llms.base_llm.base_utils import BaseLLMModelInfo from litellm.secret_managers.main import get_secret_str -from litellm.types.llms.openai import AllMessageValues +from litellm.types.llms.openai import AllMessageValues, ChatCompletionImageObject +from litellm.types.utils import ModelInfoBase, ProviderSpecificModelInfo from ...openai.chat.gpt_transformation import OpenAIGPTConfig -class FireworksAIConfig(OpenAIGPTConfig): +class FireworksAIConfig(BaseLLMModelInfo, OpenAIGPTConfig): """ Reference: https://docs.fireworks.ai/api-reference/post-chatcompletions @@ -110,6 +113,80 @@ class FireworksAIConfig(OpenAIGPTConfig): optional_params[param] = value return optional_params + def _add_transform_inline_image_block( + self, + content: ChatCompletionImageObject, + model: str, + disable_add_transform_inline_image_block: Optional[bool], + ) -> ChatCompletionImageObject: + """ + Add transform_inline to the image_url (allows non-vision models to parse documents/images/etc.) + - ignore if model is a vision model + - ignore if user has disabled this feature + """ + if ( + "vision" in model or disable_add_transform_inline_image_block + ): # allow user to toggle this feature. + return content + if isinstance(content["image_url"], str): + content["image_url"] = f"{content['image_url']}#transform=inline" + elif isinstance(content["image_url"], dict): + content["image_url"][ + "url" + ] = f"{content['image_url']['url']}#transform=inline" + return content + + def _transform_messages_helper( + self, messages: List[AllMessageValues], model: str, litellm_params: dict + ) -> List[AllMessageValues]: + """ + Add 'transform=inline' to the url of the image_url + """ + disable_add_transform_inline_image_block = cast( + Optional[bool], + litellm_params.get( + "disable_add_transform_inline_image_block", + litellm.disable_add_transform_inline_image_block, + ), + ) + for message in messages: + if message["role"] == "user": + _message_content = message.get("content") + if _message_content is not None and isinstance(_message_content, list): + for content in _message_content: + if content["type"] == "image_url": + content = self._add_transform_inline_image_block( + content=content, + model=model, + disable_add_transform_inline_image_block=disable_add_transform_inline_image_block, + ) + return messages + + def get_model_info( + self, model: str, existing_model_info: Optional[ModelInfoBase] = None + ) -> ModelInfoBase: + provider_specific_model_info = ProviderSpecificModelInfo( + supports_function_calling=True, + supports_prompt_caching=True, # https://docs.fireworks.ai/guides/prompt-caching + supports_pdf_input=True, # via document inlining + supports_vision=True, # via document inlining + ) + if existing_model_info is not None: + return ModelInfoBase( + **{**existing_model_info, **provider_specific_model_info} + ) + return ModelInfoBase( + key=model, + litellm_provider="fireworks_ai", + mode="chat", + input_cost_per_token=0.0, + output_cost_per_token=0.0, + max_tokens=None, + max_input_tokens=None, + max_output_tokens=None, + **provider_specific_model_info, + ) + def transform_request( self, model: str, @@ -120,6 +197,9 @@ class FireworksAIConfig(OpenAIGPTConfig): ) -> dict: if not model.startswith("accounts/"): model = f"accounts/fireworks/models/{model}" + messages = self._transform_messages_helper( + messages=messages, model=model, litellm_params=litellm_params + ) return super().transform_request( model=model, messages=messages, diff --git a/litellm/main.py b/litellm/main.py index e36774b2b7..928fc47d9e 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -899,6 +899,10 @@ def completion( # type: ignore # noqa: PLR0915 hf_model_name = kwargs.get("hf_model_name", None) supports_system_message = kwargs.get("supports_system_message", None) base_model = kwargs.get("base_model", None) + ### DISABLE FLAGS ### + disable_add_transform_inline_image_block = kwargs.get( + "disable_add_transform_inline_image_block", None + ) ### TEXT COMPLETION CALLS ### text_completion = kwargs.get("text_completion", False) atext_completion = kwargs.get("atext_completion", False) @@ -956,14 +960,11 @@ def completion( # type: ignore # noqa: PLR0915 "top_logprobs", "extra_headers", ] - default_params = openai_params + all_litellm_params - litellm_params = {} # used to prevent unbound var errors non_default_params = { k: v for k, v in kwargs.items() if k not in default_params } # model-specific params - pass them straight to the model/provider - ## PROMPT MANAGEMENT HOOKS ## if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and prompt_id is not None: @@ -1156,6 +1157,7 @@ def completion( # type: ignore # noqa: PLR0915 hf_model_name=hf_model_name, custom_prompt_dict=custom_prompt_dict, litellm_metadata=kwargs.get("litellm_metadata"), + disable_add_transform_inline_image_block=disable_add_transform_inline_image_block, ) logging.update_environment_variables( model=model, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 9c37d84f8a..86d7f72f84 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,17 +1,13 @@ model_list: - - model_name: model-test + - model_name: openai/* litellm_params: - model: openai/gpt-3.5-turbo + model: openai/* api_key: os.environ/OPENAI_API_KEY - mock_response: "Hello, world!" - rpm: 1 - - model_name: model-test + model_info: + access_groups: ["default-models"] + - model_name: openai/o1-* litellm_params: - model: openai/o1-mini + model: openai/o1-* api_key: os.environ/OPENAI_API_KEY - mock_response: "Hello, world, it's o1!" - rpm: 10 - -router_settings: - routing_strategy: usage-based-routing-v2 - disable_cooldowns: True + model_info: + access_groups: ["restricted-models"] \ No newline at end of file diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index 5550309069..580f23687a 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -16,7 +16,7 @@ import secrets import traceback import uuid from datetime import datetime, timedelta, timezone -from typing import List, Optional, Tuple, cast +from typing import List, Literal, Optional, Tuple, cast import fastapi from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request, status @@ -38,6 +38,7 @@ from litellm.proxy.utils import ( duration_in_seconds, handle_exception_on_proxy, ) +from litellm.router import Router from litellm.secret_managers.main import get_secret from litellm.types.utils import ( BudgetConfig, @@ -330,6 +331,8 @@ async def generate_key_fn( # noqa: PLR0915 try: from litellm.proxy.proxy_server import ( litellm_proxy_admin_name, + llm_router, + premium_user, prisma_client, user_api_key_cache, user_custom_key_generate, @@ -386,6 +389,12 @@ async def generate_key_fn( # noqa: PLR0915 detail=str(e), ) + _check_model_access_group( + models=data.models, + llm_router=llm_router, + premium_user=premium_user, + ) + # check if user set default key/generate params on config.yaml if litellm.default_key_generate_params is not None: for elem in data: @@ -992,6 +1001,34 @@ async def info_key_fn( raise handle_exception_on_proxy(e) +def _check_model_access_group( + models: Optional[List[str]], llm_router: Optional[Router], premium_user: bool +) -> Literal[True]: + """ + if is_model_access_group is True + is_wildcard_route is True, check if user is a premium user + + Return True if user is a premium user, False otherwise + """ + if models is None or llm_router is None: + return True + + for model in models: + if llm_router._is_model_access_group_for_wildcard_route( + model_access_group=model + ): + if not premium_user: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail={ + "error": "Setting a model access group on a wildcard model is only available for LiteLLM Enterprise users.{}".format( + CommonProxyErrors.not_premium_user.value + ) + }, + ) + + return True + + async def generate_key_helper_fn( # noqa: PLR0915 request_type: Literal[ "user", "key" diff --git a/litellm/router.py b/litellm/router.py index 065905503e..9657e89e58 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -4713,10 +4713,14 @@ class Router: return None def get_model_access_groups( - self, model_name: Optional[str] = None + self, model_name: Optional[str] = None, model_access_group: Optional[str] = None ) -> Dict[str, List[str]]: """ If model_name is provided, only return access groups for that model. + + Parameters: + - model_name: Optional[str] - the received model name from the user (can be a wildcard route). If set, will only return access groups for that model. + - model_access_group: Optional[str] - the received model access group from the user. If set, will only return models for that access group. """ from collections import defaultdict @@ -4726,11 +4730,39 @@ class Router: if model_list: for m in model_list: for group in m.get("model_info", {}).get("access_groups", []): - model_name = m["model_name"] - access_groups[group].append(model_name) + if model_access_group is not None: + if group == model_access_group: + model_name = m["model_name"] + access_groups[group].append(model_name) + else: + model_name = m["model_name"] + access_groups[group].append(model_name) return access_groups + def _is_model_access_group_for_wildcard_route( + self, model_access_group: str + ) -> bool: + """ + Return True if model access group is a wildcard route + """ + # GET ACCESS GROUPS + access_groups = self.get_model_access_groups( + model_access_group=model_access_group + ) + + if len(access_groups) == 0: + return False + + models = access_groups.get(model_access_group, []) + + for model in models: + # CHECK IF MODEL ACCESS GROUP IS A WILDCARD ROUTE + if self.pattern_router.route(request=model) is not None: + return True + + return False + def get_settings(self): """ Get router settings method, returns a dictionary of the settings and their values. diff --git a/litellm/router_utils/pattern_match_deployments.py b/litellm/router_utils/pattern_match_deployments.py index a0d590f23c..729510574a 100644 --- a/litellm/router_utils/pattern_match_deployments.py +++ b/litellm/router_utils/pattern_match_deployments.py @@ -128,7 +128,7 @@ class PatternMatchRouter: if no pattern is found, return None Args: - request: Optional[str] + request: str - the received model name from the user (can be a wildcard route). If none, No deployments will be returned. filtered_model_names: Optional[List[str]] - if provided, only return deployments that match the filtered_model_names Returns: Optional[List[Deployment]]: llm deployments diff --git a/litellm/types/utils.py b/litellm/types/utils.py index ed9f2ee98c..d8b4bf282f 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -75,7 +75,20 @@ class ProviderField(TypedDict): field_value: str -class ModelInfoBase(TypedDict, total=False): +class ProviderSpecificModelInfo(TypedDict, total=False): + supports_system_messages: Optional[bool] + supports_response_schema: Optional[bool] + supports_vision: Optional[bool] + supports_function_calling: Optional[bool] + supports_assistant_prefill: Optional[bool] + supports_prompt_caching: Optional[bool] + supports_audio_input: Optional[bool] + supports_embedding_image_input: Optional[bool] + supports_audio_output: Optional[bool] + supports_pdf_input: Optional[bool] + + +class ModelInfoBase(ProviderSpecificModelInfo, total=False): key: Required[str] # the key in litellm.model_cost which is returned max_tokens: Required[Optional[int]] @@ -116,16 +129,6 @@ class ModelInfoBase(TypedDict, total=False): "completion", "embedding", "image_generation", "chat", "audio_transcription" ] ] - supports_system_messages: Optional[bool] - supports_response_schema: Optional[bool] - supports_vision: Optional[bool] - supports_function_calling: Optional[bool] - supports_assistant_prefill: Optional[bool] - supports_prompt_caching: Optional[bool] - supports_audio_input: Optional[bool] - supports_embedding_image_input: Optional[bool] - supports_audio_output: Optional[bool] - supports_pdf_input: Optional[bool] tpm: Optional[int] rpm: Optional[int] @@ -1613,6 +1616,7 @@ all_litellm_params = [ "caching", "mock_response", "mock_timeout", + "disable_add_transform_inline_image_block", "api_key", "api_version", "prompt_id", diff --git a/litellm/utils.py b/litellm/utils.py index 6decadc719..37e50b55ad 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -174,6 +174,7 @@ from openai import OpenAIError as OriginalError from litellm.llms.base_llm.audio_transcription.transformation import ( BaseAudioTranscriptionConfig, ) +from litellm.llms.base_llm.base_utils import BaseLLMModelInfo from litellm.llms.base_llm.chat.transformation import BaseConfig from litellm.llms.base_llm.completion.transformation import BaseTextCompletionConfig from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig @@ -1989,6 +1990,7 @@ def get_litellm_params( hf_model_name: Optional[str] = None, custom_prompt_dict: Optional[dict] = None, litellm_metadata: Optional[dict] = None, + disable_add_transform_inline_image_block: Optional[bool] = None, ): litellm_params = { "acompletion": acompletion, @@ -2021,6 +2023,7 @@ def get_litellm_params( "hf_model_name": hf_model_name, "custom_prompt_dict": custom_prompt_dict, "litellm_metadata": litellm_metadata, + "disable_add_transform_inline_image_block": disable_add_transform_inline_image_block, } return litellm_params @@ -4373,6 +4376,17 @@ def _get_model_info_helper( # noqa: PLR0915 model_info=_model_info, custom_llm_provider=custom_llm_provider ): _model_info = None + if _model_info is None and ProviderConfigManager.get_provider_model_info( + model=model, provider=LlmProviders(custom_llm_provider) + ): + provider_config = ProviderConfigManager.get_provider_model_info( + model=model, provider=LlmProviders(custom_llm_provider) + ) + if provider_config is not None: + _model_info = cast( + dict, provider_config.get_model_info(model=model) + ) + key = "provider_specific_model_info" if _model_info is None or key is None: raise ValueError( "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" @@ -6338,6 +6352,15 @@ class ProviderConfigManager: return litellm.TogetherAITextCompletionConfig() return litellm.OpenAITextCompletionConfig() + @staticmethod + def get_provider_model_info( + model: str, + provider: LlmProviders, + ) -> Optional[BaseLLMModelInfo]: + if LlmProviders.FIREWORKS_AI == provider: + return litellm.FireworksAIConfig() + return None + def get_end_user_id_for_cost_tracking( litellm_params: dict, diff --git a/tests/llm_translation/test_fireworks_ai_translation.py b/tests/llm_translation/test_fireworks_ai_translation.py index f91402a86a..f38efd96dc 100644 --- a/tests/llm_translation/test_fireworks_ai_translation.py +++ b/tests/llm_translation/test_fireworks_ai_translation.py @@ -103,3 +103,96 @@ class TestFireworksAIAudioTranscription(BaseLLMAudioTranscriptionTest): def get_custom_llm_provider(self) -> litellm.LlmProviders: return litellm.LlmProviders.FIREWORKS_AI + + +@pytest.mark.parametrize( + "disable_add_transform_inline_image_block", + [True, False], +) +def test_document_inlining_example(disable_add_transform_inline_image_block): + litellm.set_verbose = True + if disable_add_transform_inline_image_block is True: + with pytest.raises(Exception): + completion = litellm.completion( + model="fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://storage.googleapis.com/fireworks-public/test/sample_resume.pdf" + }, + }, + { + "type": "text", + "text": "What are the candidate's BA and MBA GPAs?", + }, + ], + } + ], + disable_add_transform_inline_image_block=disable_add_transform_inline_image_block, + ) + else: + completion = litellm.completion( + model="fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct", + messages=[ + { + "role": "user", + "content": "this is a test request, write a short poem", + }, + ], + disable_add_transform_inline_image_block=disable_add_transform_inline_image_block, + ) + print(completion) + + +@pytest.mark.parametrize( + "content, model, expected_url", + [ + ( + {"image_url": "http://example.com/image.png"}, + "gpt-4", + "http://example.com/image.png#transform=inline", + ), + ( + {"image_url": {"url": "http://example.com/image.png"}}, + "gpt-4", + {"url": "http://example.com/image.png#transform=inline"}, + ), + ( + {"image_url": "http://example.com/image.png"}, + "vision-gpt", + "http://example.com/image.png", + ), + ], +) +def test_transform_inline(content, model, expected_url): + + result = litellm.FireworksAIConfig()._add_transform_inline_image_block( + content=content, model=model, disable_add_transform_inline_image_block=False + ) + if isinstance(expected_url, str): + assert result["image_url"] == expected_url + else: + assert result["image_url"]["url"] == expected_url["url"] + + +@pytest.mark.parametrize( + "model, is_disabled, expected_url", + [ + ("gpt-4", True, "http://example.com/image.png"), + ("vision-gpt", False, "http://example.com/image.png"), + ("gpt-4", False, "http://example.com/image.png#transform=inline"), + ], +) +def test_global_disable_flag(model, is_disabled, expected_url): + content = {"image_url": "http://example.com/image.png"} + result = litellm.FireworksAIConfig()._add_transform_inline_image_block( + content=content, + model=model, + disable_add_transform_inline_image_block=is_disabled, + ) + assert result["image_url"] == expected_url + litellm.disable_add_transform_inline_image_block = False # Reset for other tests diff --git a/tests/local_testing/test_router_utils.py b/tests/local_testing/test_router_utils.py index 706fdc55f4..fa1c6f5f9e 100644 --- a/tests/local_testing/test_router_utils.py +++ b/tests/local_testing/test_router_utils.py @@ -364,3 +364,23 @@ async def test_get_remaining_model_group_usage(): assert remaining_usage is not None assert "x-ratelimit-remaining-requests" in remaining_usage assert "x-ratelimit-remaining-tokens" in remaining_usage + + +@pytest.mark.parametrize( + "potential_access_group, expected_result", + [("gemini-models", True), ("gemini-models-2", False), ("gemini/*", False)], +) +def test_router_get_model_access_groups(potential_access_group, expected_result): + router = Router( + model_list=[ + { + "model_name": "gemini/*", + "litellm_params": {"model": "gemini/*"}, + "model_info": {"id": 1, "access_groups": ["gemini-models"]}, + }, + ] + ) + access_groups = router._is_model_access_group_for_wildcard_route( + model_access_group=potential_access_group + ) + assert access_groups == expected_result diff --git a/tests/local_testing/test_utils.py b/tests/local_testing/test_utils.py index 7240d0dba4..28cff52e95 100644 --- a/tests/local_testing/test_utils.py +++ b/tests/local_testing/test_utils.py @@ -1240,3 +1240,15 @@ def test_token_counter_with_image_url_with_detail_high(): ) print("tokens", _tokens) assert _tokens == DEFAULT_IMAGE_TOKEN_COUNT + 7 + + +def test_fireworks_ai_document_inlining(): + """ + With document inlining, all fireworks ai models are now: + - supports_pdf + - supports_vision + """ + from litellm.utils import supports_pdf_input, supports_vision + + assert supports_pdf_input("fireworks_ai/llama-3.1-8b-instruct") is True + assert supports_vision("fireworks_ai/llama-3.1-8b-instruct") is True