mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
fix(health.md): add rerank model health check information (#7295)
* fix(health.md): add rerank model health check information * build(model_prices_and_context_window.json): add gemini 2.0 for google ai studio - pricing + commercial rate limits * build(model_prices_and_context_window.json): add gemini-2.0 supports audio output = true * docs(team_model_add.md): clarify allowing teams to add models is an enterprise feature * fix(o1_transformation.py): add support for 'n', 'response_format' and 'stop' params for o1 and 'stream_options' param for o1-mini * build(model_prices_and_context_window.json): add 'supports_system_message' to supporting openai models needed as o1-preview, and o1-mini models don't support 'system message * fix(o1_transformation.py): translate system message based on if o1 model supports it * fix(o1_transformation.py): return 'stream' param support if o1-mini/o1-preview o1 currently doesn't support streaming, but the other model versions do Fixes https://github.com/BerriAI/litellm/issues/7292 * fix(o1_transformation.py): return tool calling/response_format in supported params if model map says so Fixes https://github.com/BerriAI/litellm/issues/7292 * fix: fix linting errors * fix: update '_transform_messages' * fix(o1_transformation.py): fix provider passed for supported param checks * test(base_llm_unit_tests.py): skip test if api takes >5s to respond * fix(utils.py): return false in 'supports_factory' if can't find value * fix(o1_transformation.py): always return stream + stream_options as supported params + handle stream options being passed in for azure o1 * feat(openai.py): support stream faking natively in openai handler Allows o1 calls to be faked for just the "o1" model, allows native streaming for o1-mini, o1-preview Fixes https://github.com/BerriAI/litellm/issues/7292 * fix(openai.py): use inference param instead of original optional param
This commit is contained in:
parent
e95820367f
commit
1a4910f6c0
34 changed files with 800 additions and 515 deletions
|
@ -121,6 +121,20 @@ model_list:
|
||||||
mode: audio_speech
|
mode: audio_speech
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Rerank Models
|
||||||
|
|
||||||
|
To run rerank health checks, specify the mode as "rerank" in your config for the relevant model.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: rerank-english-v3.0
|
||||||
|
litellm_params:
|
||||||
|
model: cohere/rerank-english-v3.0
|
||||||
|
api_key: os.environ/COHERE_API_KEY
|
||||||
|
model_info:
|
||||||
|
mode: rerank
|
||||||
|
```
|
||||||
|
|
||||||
### Batch Models (Azure Only)
|
### Batch Models (Azure Only)
|
||||||
|
|
||||||
For Azure models deployed as 'batch' models, set `mode: batch`.
|
For Azure models deployed as 'batch' models, set `mode: batch`.
|
||||||
|
|
|
@ -1,4 +1,13 @@
|
||||||
# Allow Teams to Add Models
|
# ✨ Allow Teams to Add Models
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
This is an Enterprise feature.
|
||||||
|
[Enterprise Pricing](https://www.litellm.ai/#pricing)
|
||||||
|
|
||||||
|
[Contact us here to get a free trial](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
Allow team to add a their own models/key for that project - so any OpenAI call they make uses their OpenAI key.
|
Allow team to add a their own models/key for that project - so any OpenAI call they make uses their OpenAI key.
|
||||||
|
|
||||||
|
|
|
@ -3144,7 +3144,9 @@ def prompt_factory(
|
||||||
else:
|
else:
|
||||||
return gemini_text_image_pt(messages=messages)
|
return gemini_text_image_pt(messages=messages)
|
||||||
elif custom_llm_provider == "mistral":
|
elif custom_llm_provider == "mistral":
|
||||||
return litellm.MistralConfig()._transform_messages(messages=messages)
|
return litellm.MistralConfig()._transform_messages(
|
||||||
|
messages=messages, model=model
|
||||||
|
)
|
||||||
elif custom_llm_provider == "bedrock":
|
elif custom_llm_provider == "bedrock":
|
||||||
if "amazon.titan-text" in model:
|
if "amazon.titan-text" in model:
|
||||||
return amazon_titan_pt(messages=messages)
|
return amazon_titan_pt(messages=messages)
|
||||||
|
|
|
@ -260,12 +260,6 @@ class AnthropicTextConfig(BaseConfig):
|
||||||
|
|
||||||
return str(prompt)
|
return str(prompt)
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self, messages: List[AllMessageValues]
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
"Not required"
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def get_model_response_iterator(
|
def get_model_response_iterator(
|
||||||
self,
|
self,
|
||||||
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
|
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
|
||||||
|
|
|
@ -57,6 +57,7 @@ class AzureOpenAIO1ChatCompletion(AzureChatCompletion):
|
||||||
client=None,
|
client=None,
|
||||||
):
|
):
|
||||||
stream: Optional[bool] = optional_params.pop("stream", False)
|
stream: Optional[bool] = optional_params.pop("stream", False)
|
||||||
|
stream_options: Optional[dict] = optional_params.pop("stream_options", None)
|
||||||
response = super().completion(
|
response = super().completion(
|
||||||
model,
|
model,
|
||||||
messages,
|
messages,
|
||||||
|
@ -90,6 +91,7 @@ class AzureOpenAIO1ChatCompletion(AzureChatCompletion):
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider="openai",
|
custom_llm_provider="openai",
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
|
stream_options=stream_options,
|
||||||
)
|
)
|
||||||
|
|
||||||
return streaming_response
|
return streaming_response
|
||||||
|
|
|
@ -2,11 +2,11 @@ from typing import List, Optional, Tuple
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.llms.openai.openai import OpenAIConfig
|
|
||||||
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||||
_audio_or_image_in_message_content,
|
_audio_or_image_in_message_content,
|
||||||
convert_content_list_to_str,
|
convert_content_list_to_str,
|
||||||
)
|
)
|
||||||
|
from litellm.llms.openai.openai import OpenAIConfig
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
from litellm.types.llms.openai import AllMessageValues
|
from litellm.types.llms.openai import AllMessageValues
|
||||||
from litellm.types.utils import ProviderField
|
from litellm.types.utils import ProviderField
|
||||||
|
@ -33,6 +33,7 @@ class AzureAIStudioConfig(OpenAIConfig):
|
||||||
def _transform_messages(
|
def _transform_messages(
|
||||||
self,
|
self,
|
||||||
messages: List[AllMessageValues],
|
messages: List[AllMessageValues],
|
||||||
|
model: str,
|
||||||
) -> List:
|
) -> List:
|
||||||
"""
|
"""
|
||||||
- Azure AI Studio doesn't support content as a list. This handles:
|
- Azure AI Studio doesn't support content as a list. This handles:
|
||||||
|
|
|
@ -82,6 +82,14 @@ class BaseConfig(ABC):
|
||||||
and v is not None
|
and v is not None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def should_fake_stream(
|
||||||
|
self, model: str, custom_llm_provider: Optional[str] = None
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Returns True if the model/provider should fake stream
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_supported_openai_params(self, model: str) -> list:
|
def get_supported_openai_params(self, model: str) -> list:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -131,11 +131,6 @@ class ClarifaiConfig(BaseConfig):
|
||||||
headers["Authorization"] = f"Bearer {api_key}"
|
headers["Authorization"] = f"Bearer {api_key}"
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self, messages: List[AllMessageValues]
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def get_error_class(
|
def get_error_class(
|
||||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||||
) -> BaseLLMException:
|
) -> BaseLLMException:
|
||||||
|
|
|
@ -158,11 +158,6 @@ class CloudflareChatConfig(BaseConfig):
|
||||||
message=error_message,
|
message=error_message,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self, messages: List[AllMessageValues]
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def get_model_response_iterator(
|
def get_model_response_iterator(
|
||||||
self,
|
self,
|
||||||
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
|
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
|
||||||
|
|
|
@ -365,8 +365,3 @@ class CohereChatConfig(BaseConfig):
|
||||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||||
) -> BaseLLMException:
|
) -> BaseLLMException:
|
||||||
return CohereError(status_code=status_code, message=error_message)
|
return CohereError(status_code=status_code, message=error_message)
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self, messages: List[AllMessageValues]
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
|
@ -121,12 +121,6 @@ class CohereTextConfig(BaseConfig):
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self,
|
|
||||||
messages: List[AllMessageValues],
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def get_error_class(
|
def get_error_class(
|
||||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||||
) -> BaseLLMException:
|
) -> BaseLLMException:
|
||||||
|
|
|
@ -2,11 +2,12 @@
|
||||||
Handles the chat completion request for Databricks
|
Handles the chat completion request for Databricks
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Any, Callable, Literal, Optional, Tuple, Union
|
from typing import Any, Callable, List, Literal, Optional, Tuple, Union, cast
|
||||||
|
|
||||||
from httpx._config import Timeout
|
from httpx._config import Timeout
|
||||||
|
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
|
from litellm.types.llms.openai import AllMessageValues
|
||||||
from litellm.types.utils import CustomStreamingDecoder
|
from litellm.types.utils import CustomStreamingDecoder
|
||||||
from litellm.utils import ModelResponse
|
from litellm.utils import ModelResponse
|
||||||
|
|
||||||
|
@ -44,7 +45,9 @@ class DatabricksChatCompletion(OpenAILikeChatHandler, DatabricksBase):
|
||||||
streaming_decoder: Optional[CustomStreamingDecoder] = None,
|
streaming_decoder: Optional[CustomStreamingDecoder] = None,
|
||||||
fake_stream: bool = False,
|
fake_stream: bool = False,
|
||||||
):
|
):
|
||||||
messages = DatabricksConfig()._transform_messages(messages) # type: ignore
|
messages = DatabricksConfig()._transform_messages(
|
||||||
|
messages=cast(List[AllMessageValues], messages), model=model
|
||||||
|
)
|
||||||
api_base, headers = self.databricks_validate_environment(
|
api_base, headers = self.databricks_validate_environment(
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
|
|
|
@ -7,14 +7,14 @@ from typing import List, Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from litellm.types.llms.openai import AllMessageValues
|
|
||||||
from litellm.types.utils import ProviderField
|
|
||||||
|
|
||||||
from ...openai_like.chat.transformation import OpenAILikeChatConfig
|
|
||||||
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||||
handle_messages_with_content_list_to_str_conversion,
|
handle_messages_with_content_list_to_str_conversion,
|
||||||
strip_name_from_messages,
|
strip_name_from_messages,
|
||||||
)
|
)
|
||||||
|
from litellm.types.llms.openai import AllMessageValues
|
||||||
|
from litellm.types.utils import ProviderField
|
||||||
|
|
||||||
|
from ...openai_like.chat.transformation import OpenAILikeChatConfig
|
||||||
|
|
||||||
|
|
||||||
class DatabricksConfig(OpenAILikeChatConfig):
|
class DatabricksConfig(OpenAILikeChatConfig):
|
||||||
|
@ -86,7 +86,7 @@ class DatabricksConfig(OpenAILikeChatConfig):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _transform_messages(
|
def _transform_messages(
|
||||||
self, messages: List[AllMessageValues]
|
self, messages: List[AllMessageValues], model: str
|
||||||
) -> List[AllMessageValues]:
|
) -> List[AllMessageValues]:
|
||||||
"""
|
"""
|
||||||
Databricks does not support:
|
Databricks does not support:
|
||||||
|
@ -102,4 +102,4 @@ class DatabricksConfig(OpenAILikeChatConfig):
|
||||||
new_messages.append(_message)
|
new_messages.append(_message)
|
||||||
new_messages = handle_messages_with_content_list_to_str_conversion(new_messages)
|
new_messages = handle_messages_with_content_list_to_str_conversion(new_messages)
|
||||||
new_messages = strip_name_from_messages(new_messages)
|
new_messages = strip_name_from_messages(new_messages)
|
||||||
return super()._transform_messages(new_messages)
|
return super()._transform_messages(messages=new_messages, model=model)
|
||||||
|
|
|
@ -8,26 +8,26 @@ from typing import List, Optional, Tuple, Union
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||||
|
handle_messages_with_content_list_to_str_conversion,
|
||||||
|
)
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
|
from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
|
||||||
|
|
||||||
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
||||||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||||
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
|
||||||
handle_messages_with_content_list_to_str_conversion,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DeepSeekChatConfig(OpenAIGPTConfig):
|
class DeepSeekChatConfig(OpenAIGPTConfig):
|
||||||
|
|
||||||
def _transform_messages(
|
def _transform_messages(
|
||||||
self, messages: List[AllMessageValues]
|
self, messages: List[AllMessageValues], model: str
|
||||||
) -> List[AllMessageValues]:
|
) -> List[AllMessageValues]:
|
||||||
"""
|
"""
|
||||||
DeepSeek does not support content in list format.
|
DeepSeek does not support content in list format.
|
||||||
"""
|
"""
|
||||||
messages = handle_messages_with_content_list_to_str_conversion(messages)
|
messages = handle_messages_with_content_list_to_str_conversion(messages)
|
||||||
return super()._transform_messages(messages)
|
return super()._transform_messages(messages=messages, model=model)
|
||||||
|
|
||||||
def _get_openai_compatible_provider_info(
|
def _get_openai_compatible_provider_info(
|
||||||
self, api_base: Optional[str], api_key: Optional[str]
|
self, api_base: Optional[str], api_key: Optional[str]
|
||||||
|
|
|
@ -2,11 +2,12 @@
|
||||||
Handles the chat completion request for groq
|
Handles the chat completion request for groq
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Any, Callable, Optional, Union
|
from typing import Any, Callable, List, Optional, Union, cast
|
||||||
|
|
||||||
from httpx._config import Timeout
|
from httpx._config import Timeout
|
||||||
|
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
|
from litellm.types.llms.openai import AllMessageValues
|
||||||
from litellm.types.utils import CustomStreamingDecoder
|
from litellm.types.utils import CustomStreamingDecoder
|
||||||
from litellm.utils import ModelResponse
|
from litellm.utils import ModelResponse
|
||||||
|
|
||||||
|
@ -42,7 +43,9 @@ class GroqChatCompletion(OpenAILikeChatHandler):
|
||||||
streaming_decoder: Optional[CustomStreamingDecoder] = None,
|
streaming_decoder: Optional[CustomStreamingDecoder] = None,
|
||||||
fake_stream: bool = False,
|
fake_stream: bool = False,
|
||||||
):
|
):
|
||||||
messages = GroqChatConfig()._transform_messages(messages) # type: ignore
|
messages = GroqChatConfig()._transform_messages(
|
||||||
|
messages=cast(List[AllMessageValues], messages), model=model
|
||||||
|
)
|
||||||
|
|
||||||
if optional_params.get("stream") is True:
|
if optional_params.get("stream") is True:
|
||||||
fake_stream = GroqChatConfig()._should_fake_stream(optional_params)
|
fake_stream = GroqChatConfig()._should_fake_stream(optional_params)
|
||||||
|
|
|
@ -61,7 +61,7 @@ class GroqChatConfig(OpenAIGPTConfig):
|
||||||
def get_config(cls):
|
def get_config(cls):
|
||||||
return super().get_config()
|
return super().get_config()
|
||||||
|
|
||||||
def _transform_messages(self, messages: List[AllMessageValues]) -> List:
|
def _transform_messages(self, messages: List[AllMessageValues], model: str) -> List:
|
||||||
for idx, message in enumerate(messages):
|
for idx, message in enumerate(messages):
|
||||||
"""
|
"""
|
||||||
1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839
|
1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839
|
||||||
|
|
|
@ -369,12 +369,6 @@ class HuggingfaceChatConfig(BaseConfig):
|
||||||
headers = {**headers, **default_headers}
|
headers = {**headers, **default_headers}
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self,
|
|
||||||
messages: List[AllMessageValues],
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
return messages
|
|
||||||
|
|
||||||
def get_error_class(
|
def get_error_class(
|
||||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||||
) -> BaseLLMException:
|
) -> BaseLLMException:
|
||||||
|
|
|
@ -9,11 +9,11 @@ Docs - https://docs.mistral.ai/api/
|
||||||
import types
|
import types
|
||||||
from typing import List, Literal, Optional, Tuple, Union
|
from typing import List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
|
|
||||||
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||||
handle_messages_with_content_list_to_str_conversion,
|
handle_messages_with_content_list_to_str_conversion,
|
||||||
strip_none_values_from_message,
|
strip_none_values_from_message,
|
||||||
)
|
)
|
||||||
|
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
from litellm.types.llms.openai import AllMessageValues
|
from litellm.types.llms.openai import AllMessageValues
|
||||||
|
|
||||||
|
@ -148,7 +148,7 @@ class MistralConfig(OpenAIGPTConfig):
|
||||||
return api_base, dynamic_api_key
|
return api_base, dynamic_api_key
|
||||||
|
|
||||||
def _transform_messages(
|
def _transform_messages(
|
||||||
self, messages: List[AllMessageValues]
|
self, messages: List[AllMessageValues], model: str
|
||||||
) -> List[AllMessageValues]:
|
) -> List[AllMessageValues]:
|
||||||
"""
|
"""
|
||||||
- handles scenario where content is list and not string
|
- handles scenario where content is list and not string
|
||||||
|
|
|
@ -23,6 +23,7 @@ from litellm.types.llms.openai import (
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
GenericStreamingChunk,
|
GenericStreamingChunk,
|
||||||
ModelInfo,
|
ModelInfo,
|
||||||
|
ModelInfoBase,
|
||||||
ModelResponse,
|
ModelResponse,
|
||||||
ProviderField,
|
ProviderField,
|
||||||
StreamingChoices,
|
StreamingChoices,
|
||||||
|
@ -198,7 +199,7 @@ class OllamaConfig(BaseConfig):
|
||||||
return v
|
return v
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_model_info(self, model: str) -> ModelInfo:
|
def get_model_info(self, model: str) -> ModelInfoBase:
|
||||||
"""
|
"""
|
||||||
curl http://localhost:11434/api/show -d '{
|
curl http://localhost:11434/api/show -d '{
|
||||||
"name": "mistral"
|
"name": "mistral"
|
||||||
|
@ -222,11 +223,10 @@ class OllamaConfig(BaseConfig):
|
||||||
|
|
||||||
_max_tokens: Optional[int] = self._get_max_tokens(model_info)
|
_max_tokens: Optional[int] = self._get_max_tokens(model_info)
|
||||||
|
|
||||||
return ModelInfo(
|
return ModelInfoBase(
|
||||||
key=model,
|
key=model,
|
||||||
litellm_provider="ollama",
|
litellm_provider="ollama",
|
||||||
mode="chat",
|
mode="chat",
|
||||||
supported_openai_params=self.get_supported_openai_params(model=model),
|
|
||||||
supports_function_calling=self._supports_function_calling(model_info),
|
supports_function_calling=self._supports_function_calling(model_info),
|
||||||
input_cost_per_token=0.0,
|
input_cost_per_token=0.0,
|
||||||
output_cost_per_token=0.0,
|
output_cost_per_token=0.0,
|
||||||
|
@ -235,11 +235,6 @@ class OllamaConfig(BaseConfig):
|
||||||
max_output_tokens=_max_tokens,
|
max_output_tokens=_max_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self, messages: List[AllMessageValues]
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
return messages
|
|
||||||
|
|
||||||
def get_error_class(
|
def get_error_class(
|
||||||
self, error_message: str, status_code: int, headers: Union[dict, Headers]
|
self, error_message: str, status_code: int, headers: Union[dict, Headers]
|
||||||
) -> BaseLLMException:
|
) -> BaseLLMException:
|
||||||
|
|
|
@ -23,11 +23,6 @@ else:
|
||||||
|
|
||||||
|
|
||||||
class OobaboogaConfig(OpenAIGPTConfig):
|
class OobaboogaConfig(OpenAIGPTConfig):
|
||||||
def _transform_messages(
|
|
||||||
self, messages: List[AllMessageValues]
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
return messages
|
|
||||||
|
|
||||||
def get_error_class(
|
def get_error_class(
|
||||||
self,
|
self,
|
||||||
error_message: str,
|
error_message: str,
|
||||||
|
|
|
@ -164,7 +164,7 @@ class OpenAIGPTConfig(BaseConfig):
|
||||||
)
|
)
|
||||||
|
|
||||||
def _transform_messages(
|
def _transform_messages(
|
||||||
self, messages: List[AllMessageValues]
|
self, messages: List[AllMessageValues], model: str
|
||||||
) -> List[AllMessageValues]:
|
) -> List[AllMessageValues]:
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,14 @@ import types
|
||||||
from typing import Any, List, Optional, Union
|
from typing import Any, List, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm import verbose_logger
|
||||||
|
from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
|
||||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
|
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
|
||||||
|
from litellm.utils import (
|
||||||
|
supports_function_calling,
|
||||||
|
supports_response_schema,
|
||||||
|
supports_system_messages,
|
||||||
|
)
|
||||||
|
|
||||||
from .gpt_transformation import OpenAIGPTConfig
|
from .gpt_transformation import OpenAIGPTConfig
|
||||||
|
|
||||||
|
@ -29,6 +36,15 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
||||||
def get_config(cls):
|
def get_config(cls):
|
||||||
return super().get_config()
|
return super().get_config()
|
||||||
|
|
||||||
|
def should_fake_stream(
|
||||||
|
self, model: str, custom_llm_provider: Optional[str] = None
|
||||||
|
) -> bool:
|
||||||
|
supported_stream_models = ["o1-mini", "o1-preview"]
|
||||||
|
for supported_model in supported_stream_models:
|
||||||
|
if supported_model in model:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def get_supported_openai_params(self, model: str) -> list:
|
def get_supported_openai_params(self, model: str) -> list:
|
||||||
"""
|
"""
|
||||||
Get the supported OpenAI params for the given model
|
Get the supported OpenAI params for the given model
|
||||||
|
@ -38,21 +54,37 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
||||||
all_openai_params = super().get_supported_openai_params(model=model)
|
all_openai_params = super().get_supported_openai_params(model=model)
|
||||||
non_supported_params = [
|
non_supported_params = [
|
||||||
"logprobs",
|
"logprobs",
|
||||||
"tools",
|
|
||||||
"tool_choice",
|
|
||||||
"parallel_tool_calls",
|
|
||||||
"function_call",
|
|
||||||
"functions",
|
|
||||||
"top_p",
|
"top_p",
|
||||||
"n",
|
|
||||||
"presence_penalty",
|
"presence_penalty",
|
||||||
"frequency_penalty",
|
"frequency_penalty",
|
||||||
"top_logprobs",
|
"top_logprobs",
|
||||||
"response_format",
|
|
||||||
"stop",
|
|
||||||
"stream_options",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
model, custom_llm_provider, api_base, api_key = get_llm_provider(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Unable to infer model provider for model={model}, defaulting to openai for o1 supported param check"
|
||||||
|
)
|
||||||
|
custom_llm_provider = "openai"
|
||||||
|
|
||||||
|
_supports_function_calling = supports_function_calling(
|
||||||
|
model, custom_llm_provider
|
||||||
|
)
|
||||||
|
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
|
||||||
|
|
||||||
|
if not _supports_function_calling:
|
||||||
|
non_supported_params.append("tools")
|
||||||
|
non_supported_params.append("tool_choice")
|
||||||
|
non_supported_params.append("parallel_tool_calls")
|
||||||
|
non_supported_params.append("function_call")
|
||||||
|
non_supported_params.append("functions")
|
||||||
|
|
||||||
|
if not _supports_response_schema:
|
||||||
|
non_supported_params.append("response_format")
|
||||||
|
|
||||||
return [
|
return [
|
||||||
param for param in all_openai_params if param not in non_supported_params
|
param for param in all_openai_params if param not in non_supported_params
|
||||||
]
|
]
|
||||||
|
@ -95,16 +127,16 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _transform_messages(
|
def _transform_messages(
|
||||||
self, messages: List[AllMessageValues]
|
self, messages: List[AllMessageValues], model: str
|
||||||
) -> List[AllMessageValues]:
|
) -> List[AllMessageValues]:
|
||||||
"""
|
"""
|
||||||
Handles limitations of O-1 model family.
|
Handles limitations of O-1 model family.
|
||||||
- modalities: image => drop param (if user opts in to dropping param)
|
- modalities: image => drop param (if user opts in to dropping param)
|
||||||
- role: system ==> translate to role 'user'
|
- role: system ==> translate to role 'user'
|
||||||
"""
|
"""
|
||||||
|
_supports_system_messages = supports_system_messages(model, "openai")
|
||||||
for i, message in enumerate(messages):
|
for i, message in enumerate(messages):
|
||||||
if message["role"] == "system":
|
if message["role"] == "system" and not _supports_system_messages:
|
||||||
new_message = ChatCompletionUserMessage(
|
new_message = ChatCompletionUserMessage(
|
||||||
content=message["content"], role="user"
|
content=message["content"], role="user"
|
||||||
)
|
)
|
||||||
|
|
|
@ -33,6 +33,7 @@ from litellm.litellm_core_utils.prompt_templates.factory import (
|
||||||
prompt_factory,
|
prompt_factory,
|
||||||
)
|
)
|
||||||
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
||||||
|
from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
|
||||||
from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
|
from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
|
@ -198,7 +199,7 @@ class OpenAIConfig(BaseConfig):
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
def _transform_messages(
|
def _transform_messages(
|
||||||
self, messages: List[AllMessageValues]
|
self, messages: List[AllMessageValues], model: str
|
||||||
) -> List[AllMessageValues]:
|
) -> List[AllMessageValues]:
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
@ -410,6 +411,24 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
def mock_streaming(
|
||||||
|
self,
|
||||||
|
response: ModelResponse,
|
||||||
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
model: str,
|
||||||
|
stream_options: Optional[dict] = None,
|
||||||
|
) -> CustomStreamWrapper:
|
||||||
|
completion_stream = MockResponseIterator(model_response=response)
|
||||||
|
streaming_response = CustomStreamWrapper(
|
||||||
|
completion_stream=completion_stream,
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="openai",
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
stream_options=stream_options,
|
||||||
|
)
|
||||||
|
|
||||||
|
return streaming_response
|
||||||
|
|
||||||
def completion( # type: ignore # noqa: PLR0915
|
def completion( # type: ignore # noqa: PLR0915
|
||||||
self,
|
self,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
|
@ -433,8 +452,21 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
):
|
):
|
||||||
super().completion()
|
super().completion()
|
||||||
try:
|
try:
|
||||||
|
fake_stream: bool = False
|
||||||
|
if custom_llm_provider is not None and model is not None:
|
||||||
|
provider_config = ProviderConfigManager.get_provider_chat_config(
|
||||||
|
model=model, provider=LlmProviders(custom_llm_provider)
|
||||||
|
)
|
||||||
|
fake_stream = provider_config.should_fake_stream(
|
||||||
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
inference_params = optional_params.copy()
|
||||||
|
stream_options: Optional[dict] = inference_params.pop(
|
||||||
|
"stream_options", None
|
||||||
|
)
|
||||||
|
stream: Optional[bool] = inference_params.pop("stream", False)
|
||||||
if headers:
|
if headers:
|
||||||
optional_params["extra_headers"] = headers
|
inference_params["extra_headers"] = headers
|
||||||
if model is None or messages is None:
|
if model is None or messages is None:
|
||||||
raise OpenAIError(status_code=422, message="Missing model or messages")
|
raise OpenAIError(status_code=422, message="Missing model or messages")
|
||||||
|
|
||||||
|
@ -456,7 +488,9 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
|
if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
|
||||||
provider_config, OpenAIConfig
|
provider_config, OpenAIConfig
|
||||||
):
|
):
|
||||||
messages = provider_config._transform_messages(messages)
|
messages = provider_config._transform_messages(
|
||||||
|
messages=messages, model=model
|
||||||
|
)
|
||||||
|
|
||||||
for _ in range(
|
for _ in range(
|
||||||
2
|
2
|
||||||
|
@ -464,7 +498,7 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
data = OpenAIConfig().transform_request(
|
data = OpenAIConfig().transform_request(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
optional_params=optional_params,
|
optional_params=inference_params,
|
||||||
litellm_params=litellm_params,
|
litellm_params=litellm_params,
|
||||||
headers=headers or {},
|
headers=headers or {},
|
||||||
)
|
)
|
||||||
|
@ -472,7 +506,7 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
try:
|
try:
|
||||||
max_retries = data.pop("max_retries", 2)
|
max_retries = data.pop("max_retries", 2)
|
||||||
if acompletion is True:
|
if acompletion is True:
|
||||||
if optional_params.get("stream", False):
|
if stream is True and fake_stream is False:
|
||||||
return self.async_streaming(
|
return self.async_streaming(
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
|
@ -485,11 +519,13 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
organization=organization,
|
organization=organization,
|
||||||
drop_params=drop_params,
|
drop_params=drop_params,
|
||||||
|
stream_options=stream_options,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return self.acompletion(
|
return self.acompletion(
|
||||||
data=data,
|
data=data,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
|
model=model,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
|
@ -499,8 +535,9 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
organization=organization,
|
organization=organization,
|
||||||
drop_params=drop_params,
|
drop_params=drop_params,
|
||||||
|
fake_stream=fake_stream,
|
||||||
)
|
)
|
||||||
elif optional_params.get("stream", False):
|
elif stream is True and fake_stream is False:
|
||||||
return self.streaming(
|
return self.streaming(
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
|
@ -512,6 +549,7 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
client=client,
|
client=client,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
organization=organization,
|
organization=organization,
|
||||||
|
stream_options=stream_options,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if not isinstance(max_retries, int):
|
if not isinstance(max_retries, int):
|
||||||
|
@ -557,16 +595,26 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
original_response=stringified_response,
|
original_response=stringified_response,
|
||||||
additional_args={"complete_input_dict": data},
|
additional_args={"complete_input_dict": data},
|
||||||
)
|
)
|
||||||
return convert_to_model_response_object(
|
|
||||||
|
final_response_obj = convert_to_model_response_object(
|
||||||
response_object=stringified_response,
|
response_object=stringified_response,
|
||||||
model_response_object=model_response,
|
model_response_object=model_response,
|
||||||
_response_headers=headers,
|
_response_headers=headers,
|
||||||
)
|
)
|
||||||
|
if fake_stream is True:
|
||||||
|
return self.mock_streaming(
|
||||||
|
response=cast(ModelResponse, final_response_obj),
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
model=model,
|
||||||
|
stream_options=stream_options,
|
||||||
|
)
|
||||||
|
|
||||||
|
return final_response_obj
|
||||||
except openai.UnprocessableEntityError as e:
|
except openai.UnprocessableEntityError as e:
|
||||||
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
||||||
if litellm.drop_params is True or drop_params is True:
|
if litellm.drop_params is True or drop_params is True:
|
||||||
optional_params = drop_params_from_unprocessable_entity_error(
|
inference_params = drop_params_from_unprocessable_entity_error(
|
||||||
e, optional_params
|
e, inference_params
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
@ -623,6 +671,7 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
async def acompletion(
|
async def acompletion(
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
|
model: str,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
logging_obj: LiteLLMLoggingObj,
|
logging_obj: LiteLLMLoggingObj,
|
||||||
timeout: Union[float, httpx.Timeout],
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
@ -633,6 +682,8 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
max_retries=None,
|
max_retries=None,
|
||||||
headers=None,
|
headers=None,
|
||||||
drop_params: Optional[bool] = None,
|
drop_params: Optional[bool] = None,
|
||||||
|
stream_options: Optional[dict] = None,
|
||||||
|
fake_stream: bool = False,
|
||||||
):
|
):
|
||||||
response = None
|
response = None
|
||||||
for _ in range(
|
for _ in range(
|
||||||
|
@ -667,6 +718,7 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
openai_aclient=openai_aclient, data=data, timeout=timeout
|
openai_aclient=openai_aclient, data=data, timeout=timeout
|
||||||
)
|
)
|
||||||
stringified_response = response.model_dump()
|
stringified_response = response.model_dump()
|
||||||
|
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=data["messages"],
|
input=data["messages"],
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
|
@ -674,12 +726,22 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
additional_args={"complete_input_dict": data},
|
additional_args={"complete_input_dict": data},
|
||||||
)
|
)
|
||||||
logging_obj.model_call_details["response_headers"] = headers
|
logging_obj.model_call_details["response_headers"] = headers
|
||||||
return convert_to_model_response_object(
|
final_response_obj = convert_to_model_response_object(
|
||||||
response_object=stringified_response,
|
response_object=stringified_response,
|
||||||
model_response_object=model_response,
|
model_response_object=model_response,
|
||||||
hidden_params={"headers": headers},
|
hidden_params={"headers": headers},
|
||||||
_response_headers=headers,
|
_response_headers=headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if fake_stream is True:
|
||||||
|
return self.mock_streaming(
|
||||||
|
response=cast(ModelResponse, final_response_obj),
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
model=model,
|
||||||
|
stream_options=stream_options,
|
||||||
|
)
|
||||||
|
|
||||||
|
return final_response_obj
|
||||||
except openai.UnprocessableEntityError as e:
|
except openai.UnprocessableEntityError as e:
|
||||||
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
||||||
if litellm.drop_params is True or drop_params is True:
|
if litellm.drop_params is True or drop_params is True:
|
||||||
|
@ -710,7 +772,11 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
client=None,
|
client=None,
|
||||||
max_retries=None,
|
max_retries=None,
|
||||||
headers=None,
|
headers=None,
|
||||||
|
stream_options: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
|
data["stream"] = True
|
||||||
|
if stream_options is not None:
|
||||||
|
data["stream_options"] = stream_options
|
||||||
openai_client: OpenAI = self._get_openai_client( # type: ignore
|
openai_client: OpenAI = self._get_openai_client( # type: ignore
|
||||||
is_async=False,
|
is_async=False,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
|
@ -761,8 +827,12 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
max_retries=None,
|
max_retries=None,
|
||||||
headers=None,
|
headers=None,
|
||||||
drop_params: Optional[bool] = None,
|
drop_params: Optional[bool] = None,
|
||||||
|
stream_options: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
response = None
|
response = None
|
||||||
|
data["stream"] = True
|
||||||
|
if stream_options is not None:
|
||||||
|
data["stream_options"] = stream_options
|
||||||
for _ in range(2):
|
for _ in range(2):
|
||||||
try:
|
try:
|
||||||
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
|
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
|
||||||
|
|
|
@ -284,7 +284,9 @@ class OpenAILikeChatHandler(OpenAILikeBase):
|
||||||
if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
|
if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
|
||||||
provider_config, OpenAIConfig
|
provider_config, OpenAIConfig
|
||||||
):
|
):
|
||||||
messages = provider_config._transform_messages(messages)
|
messages = provider_config._transform_messages(
|
||||||
|
messages=messages, model=model
|
||||||
|
)
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"model": model,
|
"model": model,
|
||||||
|
|
|
@ -139,11 +139,6 @@ class PredibaseConfig(BaseConfig):
|
||||||
"Predibase transformation currently done in handler.py. Need to migrate to this file."
|
"Predibase transformation currently done in handler.py. Need to migrate to this file."
|
||||||
)
|
)
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self, messages: List[AllMessageValues]
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
return messages
|
|
||||||
|
|
||||||
def transform_request(
|
def transform_request(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
|
|
@ -130,11 +130,6 @@ class ReplicateConfig(BaseConfig):
|
||||||
return split_model[1]
|
return split_model[1]
|
||||||
return model
|
return model
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self, messages: List[AllMessageValues]
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
return messages
|
|
||||||
|
|
||||||
def get_error_class(
|
def get_error_class(
|
||||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||||
) -> BaseLLMException:
|
) -> BaseLLMException:
|
||||||
|
|
|
@ -57,12 +57,6 @@ class SagemakerConfig(BaseConfig):
|
||||||
def get_config(cls):
|
def get_config(cls):
|
||||||
return super().get_config()
|
return super().get_config()
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self,
|
|
||||||
messages: List[AllMessageValues],
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
return messages
|
|
||||||
|
|
||||||
def get_error_class(
|
def get_error_class(
|
||||||
self, error_message: str, status_code: int, headers: Union[dict, Headers]
|
self, error_message: str, status_code: int, headers: Union[dict, Headers]
|
||||||
) -> BaseLLMException:
|
) -> BaseLLMException:
|
||||||
|
|
|
@ -240,12 +240,6 @@ class IBMWatsonXAIConfig(BaseConfig):
|
||||||
"us-south",
|
"us-south",
|
||||||
]
|
]
|
||||||
|
|
||||||
def _transform_messages(
|
|
||||||
self,
|
|
||||||
messages: List[AllMessageValues],
|
|
||||||
) -> List[AllMessageValues]:
|
|
||||||
return messages
|
|
||||||
|
|
||||||
def get_error_class(
|
def get_error_class(
|
||||||
self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
|
self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
|
||||||
) -> BaseLLMException:
|
) -> BaseLLMException:
|
||||||
|
|
|
@ -13,7 +13,8 @@
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true,
|
"supports_audio_output": true,
|
||||||
"supports_prompt_caching": true,
|
"supports_prompt_caching": true,
|
||||||
"supports_response_schema": true
|
"supports_response_schema": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"sambanova/Meta-Llama-3.1-8B-Instruct": {
|
"sambanova/Meta-Llama-3.1-8B-Instruct": {
|
||||||
"max_tokens": 16000,
|
"max_tokens": 16000,
|
||||||
|
@ -94,7 +95,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o": {
|
"gpt-4o": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -109,7 +111,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-audio-preview": {
|
"gpt-4o-audio-preview": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -124,7 +127,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-audio-preview-2024-10-01": {
|
"gpt-4o-audio-preview-2024-10-01": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -139,7 +143,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-mini-audio-preview-2024-12-17": {
|
"gpt-4o-mini-audio-preview-2024-12-17": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -154,7 +159,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-mini": {
|
"gpt-4o-mini": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -169,7 +175,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-mini-2024-07-18": {
|
"gpt-4o-mini-2024-07-18": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -184,7 +191,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"o1": {
|
"o1": {
|
||||||
"max_tokens": 100000,
|
"max_tokens": 100000,
|
||||||
|
@ -198,7 +206,9 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_response_schema": true
|
||||||
},
|
},
|
||||||
"o1-mini": {
|
"o1-mini": {
|
||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
|
@ -209,8 +219,6 @@
|
||||||
"cache_read_input_token_cost": 0.0000015,
|
"cache_read_input_token_cost": 0.0000015,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
|
||||||
"supports_parallel_function_calling": true,
|
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true
|
||||||
},
|
},
|
||||||
|
@ -223,8 +231,6 @@
|
||||||
"cache_read_input_token_cost": 0.0000015,
|
"cache_read_input_token_cost": 0.0000015,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
|
||||||
"supports_parallel_function_calling": true,
|
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true
|
||||||
},
|
},
|
||||||
|
@ -237,8 +243,6 @@
|
||||||
"cache_read_input_token_cost": 0.0000075,
|
"cache_read_input_token_cost": 0.0000075,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
|
||||||
"supports_parallel_function_calling": true,
|
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true
|
||||||
},
|
},
|
||||||
|
@ -251,8 +255,6 @@
|
||||||
"cache_read_input_token_cost": 0.0000075,
|
"cache_read_input_token_cost": 0.0000075,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
|
||||||
"supports_parallel_function_calling": true,
|
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true
|
||||||
},
|
},
|
||||||
|
@ -268,7 +270,9 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_response_schema": true
|
||||||
},
|
},
|
||||||
"chatgpt-4o-latest": {
|
"chatgpt-4o-latest": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -281,7 +285,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-2024-05-13": {
|
"gpt-4o-2024-05-13": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -294,7 +299,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-2024-08-06": {
|
"gpt-4o-2024-08-06": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -309,7 +315,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-2024-11-20": {
|
"gpt-4o-2024-11-20": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -324,7 +331,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-realtime-preview-2024-10-01": {
|
"gpt-4o-realtime-preview-2024-10-01": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -341,7 +349,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-realtime-preview": {
|
"gpt-4o-realtime-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -357,7 +366,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-realtime-preview-2024-12-17": {
|
"gpt-4o-realtime-preview-2024-12-17": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -373,7 +383,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-mini-realtime-preview": {
|
"gpt-4o-mini-realtime-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -390,7 +401,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-mini-realtime-preview-2024-12-17": {
|
"gpt-4o-mini-realtime-preview-2024-12-17": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -407,7 +419,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-turbo-preview": {
|
"gpt-4-turbo-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -419,7 +432,8 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-0314": {
|
"gpt-4-0314": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -429,7 +443,8 @@
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-0613": {
|
"gpt-4-0613": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -440,7 +455,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-32k": {
|
"gpt-4-32k": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -450,7 +466,8 @@
|
||||||
"output_cost_per_token": 0.00012,
|
"output_cost_per_token": 0.00012,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-32k-0314": {
|
"gpt-4-32k-0314": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -460,7 +477,8 @@
|
||||||
"output_cost_per_token": 0.00012,
|
"output_cost_per_token": 0.00012,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-32k-0613": {
|
"gpt-4-32k-0613": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -470,7 +488,8 @@
|
||||||
"output_cost_per_token": 0.00012,
|
"output_cost_per_token": 0.00012,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-turbo": {
|
"gpt-4-turbo": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -483,7 +502,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-turbo-2024-04-09": {
|
"gpt-4-turbo-2024-04-09": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -496,7 +516,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-1106-preview": {
|
"gpt-4-1106-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -508,7 +529,8 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-0125-preview": {
|
"gpt-4-0125-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -520,7 +542,8 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-vision-preview": {
|
"gpt-4-vision-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -531,7 +554,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-1106-vision-preview": {
|
"gpt-4-1106-vision-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -542,7 +566,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo": {
|
"gpt-3.5-turbo": {
|
||||||
"max_tokens": 4097,
|
"max_tokens": 4097,
|
||||||
|
@ -553,7 +578,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-0301": {
|
"gpt-3.5-turbo-0301": {
|
||||||
"max_tokens": 4097,
|
"max_tokens": 4097,
|
||||||
|
@ -563,7 +589,8 @@
|
||||||
"output_cost_per_token": 0.000002,
|
"output_cost_per_token": 0.000002,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-0613": {
|
"gpt-3.5-turbo-0613": {
|
||||||
"max_tokens": 4097,
|
"max_tokens": 4097,
|
||||||
|
@ -574,7 +601,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-1106": {
|
"gpt-3.5-turbo-1106": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -586,7 +614,8 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-0125": {
|
"gpt-3.5-turbo-0125": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -598,7 +627,8 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-16k": {
|
"gpt-3.5-turbo-16k": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -608,7 +638,8 @@
|
||||||
"output_cost_per_token": 0.000004,
|
"output_cost_per_token": 0.000004,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-16k-0613": {
|
"gpt-3.5-turbo-16k-0613": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -618,7 +649,8 @@
|
||||||
"output_cost_per_token": 0.000004,
|
"output_cost_per_token": 0.000004,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-3.5-turbo": {
|
"ft:gpt-3.5-turbo": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -627,7 +659,8 @@
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000006,
|
"output_cost_per_token": 0.000006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-3.5-turbo-0125": {
|
"ft:gpt-3.5-turbo-0125": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -636,7 +669,8 @@
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000006,
|
"output_cost_per_token": 0.000006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-3.5-turbo-1106": {
|
"ft:gpt-3.5-turbo-1106": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -645,7 +679,8 @@
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000006,
|
"output_cost_per_token": 0.000006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-3.5-turbo-0613": {
|
"ft:gpt-3.5-turbo-0613": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -654,7 +689,8 @@
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000006,
|
"output_cost_per_token": 0.000006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-4-0613": {
|
"ft:gpt-4-0613": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -665,7 +701,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
|
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing",
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-4o-2024-08-06": {
|
"ft:gpt-4o-2024-08-06": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -678,7 +715,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true
|
"supports_vision": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-4o-2024-11-20": {
|
"ft:gpt-4o-2024-11-20": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -693,7 +731,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-4o-mini-2024-07-18": {
|
"ft:gpt-4o-mini-2024-07-18": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -708,7 +747,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:davinci-002": {
|
"ft:davinci-002": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -3166,6 +3206,42 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
|
||||||
|
},
|
||||||
|
"gemini/gemini-2.0-flash-exp": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 1048576,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_pdf_size_mb": 30,
|
||||||
|
"input_cost_per_image": 0,
|
||||||
|
"input_cost_per_video_per_second": 0,
|
||||||
|
"input_cost_per_audio_per_second": 0,
|
||||||
|
"input_cost_per_token": 0,
|
||||||
|
"input_cost_per_character": 0,
|
||||||
|
"input_cost_per_token_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_character_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_image_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"output_cost_per_character": 0,
|
||||||
|
"output_cost_per_token_above_128k_tokens": 0,
|
||||||
|
"output_cost_per_character_above_128k_tokens": 0,
|
||||||
|
"litellm_provider": "gemini",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"tpm": 4000000,
|
||||||
|
"rpm": 10,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
|
||||||
},
|
},
|
||||||
"vertex_ai/claude-3-sonnet": {
|
"vertex_ai/claude-3-sonnet": {
|
||||||
|
|
|
@ -74,11 +74,7 @@ class ProviderField(TypedDict):
|
||||||
field_value: str
|
field_value: str
|
||||||
|
|
||||||
|
|
||||||
class ModelInfo(TypedDict, total=False):
|
class ModelInfoBase(TypedDict, total=False):
|
||||||
"""
|
|
||||||
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
|
|
||||||
"""
|
|
||||||
|
|
||||||
key: Required[str] # the key in litellm.model_cost which is returned
|
key: Required[str] # the key in litellm.model_cost which is returned
|
||||||
|
|
||||||
max_tokens: Required[Optional[int]]
|
max_tokens: Required[Optional[int]]
|
||||||
|
@ -119,7 +115,6 @@ class ModelInfo(TypedDict, total=False):
|
||||||
"completion", "embedding", "image_generation", "chat", "audio_transcription"
|
"completion", "embedding", "image_generation", "chat", "audio_transcription"
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
supported_openai_params: Required[Optional[List[str]]]
|
|
||||||
supports_system_messages: Optional[bool]
|
supports_system_messages: Optional[bool]
|
||||||
supports_response_schema: Optional[bool]
|
supports_response_schema: Optional[bool]
|
||||||
supports_vision: Optional[bool]
|
supports_vision: Optional[bool]
|
||||||
|
@ -133,6 +128,14 @@ class ModelInfo(TypedDict, total=False):
|
||||||
rpm: Optional[int]
|
rpm: Optional[int]
|
||||||
|
|
||||||
|
|
||||||
|
class ModelInfo(ModelInfoBase, total=False):
|
||||||
|
"""
|
||||||
|
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
|
||||||
|
"""
|
||||||
|
|
||||||
|
supported_openai_params: Required[Optional[List[str]]]
|
||||||
|
|
||||||
|
|
||||||
class GenericStreamingChunk(TypedDict, total=False):
|
class GenericStreamingChunk(TypedDict, total=False):
|
||||||
text: Required[str]
|
text: Required[str]
|
||||||
tool_use: Optional[ChatCompletionToolCallChunk]
|
tool_use: Optional[ChatCompletionToolCallChunk]
|
||||||
|
|
545
litellm/utils.py
545
litellm/utils.py
|
@ -132,6 +132,7 @@ from litellm.types.utils import (
|
||||||
LlmProviders,
|
LlmProviders,
|
||||||
Message,
|
Message,
|
||||||
ModelInfo,
|
ModelInfo,
|
||||||
|
ModelInfoBase,
|
||||||
ModelResponse,
|
ModelResponse,
|
||||||
ModelResponseStream,
|
ModelResponseStream,
|
||||||
ProviderField,
|
ProviderField,
|
||||||
|
@ -1645,17 +1646,11 @@ def supports_system_messages(model: str, custom_llm_provider: Optional[str]) ->
|
||||||
Raises:
|
Raises:
|
||||||
Exception: If the given model is not found in model_prices_and_context_window.json.
|
Exception: If the given model is not found in model_prices_and_context_window.json.
|
||||||
"""
|
"""
|
||||||
try:
|
return _supports_factory(
|
||||||
model_info = litellm.get_model_info(
|
model=model,
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
key="supports_system_messages",
|
||||||
if model_info.get("supports_system_messages", False) is True:
|
)
|
||||||
return True
|
|
||||||
return False
|
|
||||||
except Exception:
|
|
||||||
raise Exception(
|
|
||||||
f"Model not supports system messages. You passed model={model}, custom_llm_provider={custom_llm_provider}."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def supports_response_schema(model: str, custom_llm_provider: Optional[str]) -> bool:
|
def supports_response_schema(model: str, custom_llm_provider: Optional[str]) -> bool:
|
||||||
|
@ -1684,25 +1679,11 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) ->
|
||||||
|
|
||||||
if custom_llm_provider in PROVIDERS_GLOBALLY_SUPPORT_RESPONSE_SCHEMA:
|
if custom_llm_provider in PROVIDERS_GLOBALLY_SUPPORT_RESPONSE_SCHEMA:
|
||||||
return True
|
return True
|
||||||
try:
|
return _supports_factory(
|
||||||
## GET MODEL INFO
|
model=model,
|
||||||
model_info = litellm.get_model_info(
|
custom_llm_provider=custom_llm_provider,
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
key="supports_response_schema",
|
||||||
)
|
)
|
||||||
|
|
||||||
if model_info.get("supports_response_schema", False) is True:
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
## check if provider supports response schema globally
|
|
||||||
supported_params = get_supported_openai_params(
|
|
||||||
model=model,
|
|
||||||
custom_llm_provider=custom_llm_provider,
|
|
||||||
request_type="chat_completion",
|
|
||||||
)
|
|
||||||
if supported_params is not None and "response_schema" in supported_params:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def supports_function_calling(
|
def supports_function_calling(
|
||||||
|
@ -1721,23 +1702,11 @@ def supports_function_calling(
|
||||||
Raises:
|
Raises:
|
||||||
Exception: If the given model is not found or there's an error in retrieval.
|
Exception: If the given model is not found or there's an error in retrieval.
|
||||||
"""
|
"""
|
||||||
try:
|
return _supports_factory(
|
||||||
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
model=model,
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
key="supports_function_calling",
|
||||||
|
)
|
||||||
## CHECK IF MODEL SUPPORTS FUNCTION CALLING ##
|
|
||||||
model_info = litellm.get_model_info(
|
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
|
||||||
)
|
|
||||||
|
|
||||||
if model_info.get("supports_function_calling", False) is True:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(
|
|
||||||
f"Model not found or error in checking function calling support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str) -> bool:
|
def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str) -> bool:
|
||||||
|
@ -1759,7 +1728,7 @@ def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str)
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
)
|
)
|
||||||
|
|
||||||
model_info = litellm.get_model_info(
|
model_info = _get_model_info_helper(
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1767,9 +1736,10 @@ def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str)
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(
|
verbose_logger.debug(
|
||||||
f"Model not found or error in checking {key} support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
|
f"Model not found or error in checking {key} support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
|
||||||
)
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def supports_audio_input(model: str, custom_llm_provider: Optional[str] = None) -> bool:
|
def supports_audio_input(model: str, custom_llm_provider: Optional[str] = None) -> bool:
|
||||||
|
@ -4196,9 +4166,239 @@ def _get_potential_model_names(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_model_info( # noqa: PLR0915
|
def _get_max_position_embeddings(model_name: str) -> Optional[int]:
|
||||||
|
# Construct the URL for the config.json file
|
||||||
|
config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Make the HTTP request to get the raw JSON file
|
||||||
|
response = litellm.module_level_client.get(config_url)
|
||||||
|
response.raise_for_status() # Raise an exception for bad responses (4xx or 5xx)
|
||||||
|
|
||||||
|
# Parse the JSON response
|
||||||
|
config_json = response.json()
|
||||||
|
|
||||||
|
# Extract and return the max_position_embeddings
|
||||||
|
max_position_embeddings = config_json.get("max_position_embeddings")
|
||||||
|
|
||||||
|
if max_position_embeddings is not None:
|
||||||
|
return max_position_embeddings
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_model_info_helper( # noqa: PLR0915
|
||||||
model: str, custom_llm_provider: Optional[str] = None
|
model: str, custom_llm_provider: Optional[str] = None
|
||||||
) -> ModelInfo:
|
) -> ModelInfoBase:
|
||||||
|
"""
|
||||||
|
Helper for 'get_model_info'. Separated out to avoid infinite loop caused by returning 'supported_openai_param's
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
azure_llms = {**litellm.azure_llms, **litellm.azure_embedding_models}
|
||||||
|
if model in azure_llms:
|
||||||
|
model = azure_llms[model]
|
||||||
|
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai_beta":
|
||||||
|
custom_llm_provider = "vertex_ai"
|
||||||
|
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
|
||||||
|
if "meta/" + model in litellm.vertex_llama3_models:
|
||||||
|
model = "meta/" + model
|
||||||
|
elif model + "@latest" in litellm.vertex_mistral_models:
|
||||||
|
model = model + "@latest"
|
||||||
|
elif model + "@latest" in litellm.vertex_ai_ai21_models:
|
||||||
|
model = model + "@latest"
|
||||||
|
##########################
|
||||||
|
potential_model_names = _get_potential_model_names(
|
||||||
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
combined_model_name = potential_model_names["combined_model_name"]
|
||||||
|
stripped_model_name = potential_model_names["stripped_model_name"]
|
||||||
|
combined_stripped_model_name = potential_model_names[
|
||||||
|
"combined_stripped_model_name"
|
||||||
|
]
|
||||||
|
split_model = potential_model_names["split_model"]
|
||||||
|
custom_llm_provider = potential_model_names["custom_llm_provider"]
|
||||||
|
#########################
|
||||||
|
if custom_llm_provider == "huggingface":
|
||||||
|
max_tokens = _get_max_position_embeddings(model_name=model)
|
||||||
|
return ModelInfoBase(
|
||||||
|
key=model,
|
||||||
|
max_tokens=max_tokens, # type: ignore
|
||||||
|
max_input_tokens=None,
|
||||||
|
max_output_tokens=None,
|
||||||
|
input_cost_per_token=0,
|
||||||
|
output_cost_per_token=0,
|
||||||
|
litellm_provider="huggingface",
|
||||||
|
mode="chat",
|
||||||
|
supports_system_messages=None,
|
||||||
|
supports_response_schema=None,
|
||||||
|
supports_function_calling=None,
|
||||||
|
supports_assistant_prefill=None,
|
||||||
|
supports_prompt_caching=None,
|
||||||
|
supports_pdf_input=None,
|
||||||
|
)
|
||||||
|
elif custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
|
||||||
|
return litellm.OllamaConfig().get_model_info(model)
|
||||||
|
else:
|
||||||
|
"""
|
||||||
|
Check if: (in order of specificity)
|
||||||
|
1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq"
|
||||||
|
2. 'model' in litellm.model_cost. Checks "gemini-1.5-pro-002" in litellm.model_cost if model="gemini-1.5-pro-002" and custom_llm_provider=None
|
||||||
|
3. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given.
|
||||||
|
4. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given.
|
||||||
|
5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192"
|
||||||
|
"""
|
||||||
|
|
||||||
|
_model_info: Optional[Dict[str, Any]] = None
|
||||||
|
key: Optional[str] = None
|
||||||
|
if combined_model_name in litellm.model_cost:
|
||||||
|
key = combined_model_name
|
||||||
|
_model_info = _get_model_info_from_model_cost(key=key)
|
||||||
|
if not _check_provider_match(
|
||||||
|
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||||
|
):
|
||||||
|
_model_info = None
|
||||||
|
if _model_info is None and model in litellm.model_cost:
|
||||||
|
key = model
|
||||||
|
_model_info = _get_model_info_from_model_cost(key=key)
|
||||||
|
if not _check_provider_match(
|
||||||
|
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||||
|
):
|
||||||
|
_model_info = None
|
||||||
|
if (
|
||||||
|
_model_info is None
|
||||||
|
and combined_stripped_model_name in litellm.model_cost
|
||||||
|
):
|
||||||
|
key = combined_stripped_model_name
|
||||||
|
_model_info = _get_model_info_from_model_cost(key=key)
|
||||||
|
if not _check_provider_match(
|
||||||
|
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||||
|
):
|
||||||
|
_model_info = None
|
||||||
|
if _model_info is None and stripped_model_name in litellm.model_cost:
|
||||||
|
key = stripped_model_name
|
||||||
|
_model_info = _get_model_info_from_model_cost(key=key)
|
||||||
|
if not _check_provider_match(
|
||||||
|
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||||
|
):
|
||||||
|
_model_info = None
|
||||||
|
if _model_info is None and split_model in litellm.model_cost:
|
||||||
|
key = split_model
|
||||||
|
_model_info = _get_model_info_from_model_cost(key=key)
|
||||||
|
if not _check_provider_match(
|
||||||
|
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||||
|
):
|
||||||
|
_model_info = None
|
||||||
|
if _model_info is None or key is None:
|
||||||
|
raise ValueError(
|
||||||
|
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||||
|
)
|
||||||
|
|
||||||
|
## PROVIDER-SPECIFIC INFORMATION
|
||||||
|
if custom_llm_provider == "predibase":
|
||||||
|
_model_info["supports_response_schema"] = True
|
||||||
|
|
||||||
|
_input_cost_per_token: Optional[float] = _model_info.get(
|
||||||
|
"input_cost_per_token"
|
||||||
|
)
|
||||||
|
if _input_cost_per_token is None:
|
||||||
|
# default value to 0, be noisy about this
|
||||||
|
verbose_logger.debug(
|
||||||
|
"model={}, custom_llm_provider={} has no input_cost_per_token in model_cost_map. Defaulting to 0.".format(
|
||||||
|
model, custom_llm_provider
|
||||||
|
)
|
||||||
|
)
|
||||||
|
_input_cost_per_token = 0
|
||||||
|
|
||||||
|
_output_cost_per_token: Optional[float] = _model_info.get(
|
||||||
|
"output_cost_per_token"
|
||||||
|
)
|
||||||
|
if _output_cost_per_token is None:
|
||||||
|
# default value to 0, be noisy about this
|
||||||
|
verbose_logger.debug(
|
||||||
|
"model={}, custom_llm_provider={} has no output_cost_per_token in model_cost_map. Defaulting to 0.".format(
|
||||||
|
model, custom_llm_provider
|
||||||
|
)
|
||||||
|
)
|
||||||
|
_output_cost_per_token = 0
|
||||||
|
|
||||||
|
return ModelInfoBase(
|
||||||
|
key=key,
|
||||||
|
max_tokens=_model_info.get("max_tokens", None),
|
||||||
|
max_input_tokens=_model_info.get("max_input_tokens", None),
|
||||||
|
max_output_tokens=_model_info.get("max_output_tokens", None),
|
||||||
|
input_cost_per_token=_input_cost_per_token,
|
||||||
|
cache_creation_input_token_cost=_model_info.get(
|
||||||
|
"cache_creation_input_token_cost", None
|
||||||
|
),
|
||||||
|
cache_read_input_token_cost=_model_info.get(
|
||||||
|
"cache_read_input_token_cost", None
|
||||||
|
),
|
||||||
|
input_cost_per_character=_model_info.get(
|
||||||
|
"input_cost_per_character", None
|
||||||
|
),
|
||||||
|
input_cost_per_token_above_128k_tokens=_model_info.get(
|
||||||
|
"input_cost_per_token_above_128k_tokens", None
|
||||||
|
),
|
||||||
|
input_cost_per_query=_model_info.get("input_cost_per_query", None),
|
||||||
|
input_cost_per_second=_model_info.get("input_cost_per_second", None),
|
||||||
|
input_cost_per_audio_token=_model_info.get(
|
||||||
|
"input_cost_per_audio_token", None
|
||||||
|
),
|
||||||
|
output_cost_per_token=_output_cost_per_token,
|
||||||
|
output_cost_per_audio_token=_model_info.get(
|
||||||
|
"output_cost_per_audio_token", None
|
||||||
|
),
|
||||||
|
output_cost_per_character=_model_info.get(
|
||||||
|
"output_cost_per_character", None
|
||||||
|
),
|
||||||
|
output_cost_per_token_above_128k_tokens=_model_info.get(
|
||||||
|
"output_cost_per_token_above_128k_tokens", None
|
||||||
|
),
|
||||||
|
output_cost_per_character_above_128k_tokens=_model_info.get(
|
||||||
|
"output_cost_per_character_above_128k_tokens", None
|
||||||
|
),
|
||||||
|
output_cost_per_second=_model_info.get("output_cost_per_second", None),
|
||||||
|
output_cost_per_image=_model_info.get("output_cost_per_image", None),
|
||||||
|
output_vector_size=_model_info.get("output_vector_size", None),
|
||||||
|
litellm_provider=_model_info.get(
|
||||||
|
"litellm_provider", custom_llm_provider
|
||||||
|
),
|
||||||
|
mode=_model_info.get("mode"), # type: ignore
|
||||||
|
supports_system_messages=_model_info.get(
|
||||||
|
"supports_system_messages", None
|
||||||
|
),
|
||||||
|
supports_response_schema=_model_info.get(
|
||||||
|
"supports_response_schema", None
|
||||||
|
),
|
||||||
|
supports_vision=_model_info.get("supports_vision", False),
|
||||||
|
supports_function_calling=_model_info.get(
|
||||||
|
"supports_function_calling", False
|
||||||
|
),
|
||||||
|
supports_assistant_prefill=_model_info.get(
|
||||||
|
"supports_assistant_prefill", False
|
||||||
|
),
|
||||||
|
supports_prompt_caching=_model_info.get(
|
||||||
|
"supports_prompt_caching", False
|
||||||
|
),
|
||||||
|
supports_audio_input=_model_info.get("supports_audio_input", False),
|
||||||
|
supports_audio_output=_model_info.get("supports_audio_output", False),
|
||||||
|
supports_pdf_input=_model_info.get("supports_pdf_input", False),
|
||||||
|
tpm=_model_info.get("tpm", None),
|
||||||
|
rpm=_model_info.get("rpm", None),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
if "OllamaError" in str(e):
|
||||||
|
raise e
|
||||||
|
raise Exception(
|
||||||
|
"This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json.".format(
|
||||||
|
model, custom_llm_provider
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo:
|
||||||
"""
|
"""
|
||||||
Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model.
|
Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model.
|
||||||
|
|
||||||
|
@ -4265,241 +4465,20 @@ def get_model_info( # noqa: PLR0915
|
||||||
"supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]
|
"supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
supported_openai_params: Union[List[str], None] = []
|
supported_openai_params = litellm.get_supported_openai_params(
|
||||||
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
def _get_max_position_embeddings(model_name):
|
_model_info = _get_model_info_helper(
|
||||||
# Construct the URL for the config.json file
|
model=model,
|
||||||
config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
returned_model_info = ModelInfo(
|
||||||
# Make the HTTP request to get the raw JSON file
|
**_model_info, supported_openai_params=supported_openai_params
|
||||||
response = litellm.module_level_client.get(config_url)
|
)
|
||||||
response.raise_for_status() # Raise an exception for bad responses (4xx or 5xx)
|
|
||||||
|
|
||||||
# Parse the JSON response
|
return returned_model_info
|
||||||
config_json = response.json()
|
|
||||||
|
|
||||||
# Extract and return the max_position_embeddings
|
|
||||||
max_position_embeddings = config_json.get("max_position_embeddings")
|
|
||||||
|
|
||||||
if max_position_embeddings is not None:
|
|
||||||
return max_position_embeddings
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
azure_llms = {**litellm.azure_llms, **litellm.azure_embedding_models}
|
|
||||||
if model in azure_llms:
|
|
||||||
model = azure_llms[model]
|
|
||||||
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai_beta":
|
|
||||||
custom_llm_provider = "vertex_ai"
|
|
||||||
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
|
|
||||||
if "meta/" + model in litellm.vertex_llama3_models:
|
|
||||||
model = "meta/" + model
|
|
||||||
elif model + "@latest" in litellm.vertex_mistral_models:
|
|
||||||
model = model + "@latest"
|
|
||||||
elif model + "@latest" in litellm.vertex_ai_ai21_models:
|
|
||||||
model = model + "@latest"
|
|
||||||
##########################
|
|
||||||
potential_model_names = _get_potential_model_names(
|
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
|
||||||
)
|
|
||||||
combined_model_name = potential_model_names["combined_model_name"]
|
|
||||||
stripped_model_name = potential_model_names["stripped_model_name"]
|
|
||||||
combined_stripped_model_name = potential_model_names[
|
|
||||||
"combined_stripped_model_name"
|
|
||||||
]
|
|
||||||
split_model = potential_model_names["split_model"]
|
|
||||||
custom_llm_provider = potential_model_names["custom_llm_provider"]
|
|
||||||
#########################
|
|
||||||
supported_openai_params = litellm.get_supported_openai_params(
|
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
|
||||||
)
|
|
||||||
if custom_llm_provider == "huggingface":
|
|
||||||
max_tokens = _get_max_position_embeddings(model_name=model)
|
|
||||||
return ModelInfo(
|
|
||||||
key=model,
|
|
||||||
max_tokens=max_tokens, # type: ignore
|
|
||||||
max_input_tokens=None,
|
|
||||||
max_output_tokens=None,
|
|
||||||
input_cost_per_token=0,
|
|
||||||
output_cost_per_token=0,
|
|
||||||
litellm_provider="huggingface",
|
|
||||||
mode="chat",
|
|
||||||
supported_openai_params=supported_openai_params,
|
|
||||||
supports_system_messages=None,
|
|
||||||
supports_response_schema=None,
|
|
||||||
supports_function_calling=None,
|
|
||||||
supports_assistant_prefill=None,
|
|
||||||
supports_prompt_caching=None,
|
|
||||||
supports_pdf_input=None,
|
|
||||||
)
|
|
||||||
elif custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
|
|
||||||
return litellm.OllamaConfig().get_model_info(model)
|
|
||||||
else:
|
|
||||||
"""
|
|
||||||
Check if: (in order of specificity)
|
|
||||||
1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq"
|
|
||||||
2. 'model' in litellm.model_cost. Checks "gemini-1.5-pro-002" in litellm.model_cost if model="gemini-1.5-pro-002" and custom_llm_provider=None
|
|
||||||
3. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given.
|
|
||||||
4. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given.
|
|
||||||
5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192"
|
|
||||||
"""
|
|
||||||
|
|
||||||
_model_info: Optional[Dict[str, Any]] = None
|
|
||||||
key: Optional[str] = None
|
|
||||||
if combined_model_name in litellm.model_cost:
|
|
||||||
key = combined_model_name
|
|
||||||
_model_info = _get_model_info_from_model_cost(key=key)
|
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
|
||||||
if not _check_provider_match(
|
|
||||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
|
||||||
):
|
|
||||||
_model_info = None
|
|
||||||
if _model_info is None and model in litellm.model_cost:
|
|
||||||
key = model
|
|
||||||
_model_info = _get_model_info_from_model_cost(key=key)
|
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
|
||||||
if not _check_provider_match(
|
|
||||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
|
||||||
):
|
|
||||||
_model_info = None
|
|
||||||
if (
|
|
||||||
_model_info is None
|
|
||||||
and combined_stripped_model_name in litellm.model_cost
|
|
||||||
):
|
|
||||||
key = combined_stripped_model_name
|
|
||||||
_model_info = _get_model_info_from_model_cost(key=key)
|
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
|
||||||
if not _check_provider_match(
|
|
||||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
|
||||||
):
|
|
||||||
_model_info = None
|
|
||||||
if _model_info is None and stripped_model_name in litellm.model_cost:
|
|
||||||
key = stripped_model_name
|
|
||||||
_model_info = _get_model_info_from_model_cost(key=key)
|
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
|
||||||
if not _check_provider_match(
|
|
||||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
|
||||||
):
|
|
||||||
_model_info = None
|
|
||||||
if _model_info is None and split_model in litellm.model_cost:
|
|
||||||
key = split_model
|
|
||||||
_model_info = _get_model_info_from_model_cost(key=key)
|
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
|
||||||
if not _check_provider_match(
|
|
||||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
|
||||||
):
|
|
||||||
_model_info = None
|
|
||||||
if _model_info is None or key is None:
|
|
||||||
raise ValueError(
|
|
||||||
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
|
||||||
)
|
|
||||||
|
|
||||||
## PROVIDER-SPECIFIC INFORMATION
|
|
||||||
if custom_llm_provider == "predibase":
|
|
||||||
_model_info["supports_response_schema"] = True
|
|
||||||
|
|
||||||
_input_cost_per_token: Optional[float] = _model_info.get(
|
|
||||||
"input_cost_per_token"
|
|
||||||
)
|
|
||||||
if _input_cost_per_token is None:
|
|
||||||
# default value to 0, be noisy about this
|
|
||||||
verbose_logger.debug(
|
|
||||||
"model={}, custom_llm_provider={} has no input_cost_per_token in model_cost_map. Defaulting to 0.".format(
|
|
||||||
model, custom_llm_provider
|
|
||||||
)
|
|
||||||
)
|
|
||||||
_input_cost_per_token = 0
|
|
||||||
|
|
||||||
_output_cost_per_token: Optional[float] = _model_info.get(
|
|
||||||
"output_cost_per_token"
|
|
||||||
)
|
|
||||||
if _output_cost_per_token is None:
|
|
||||||
# default value to 0, be noisy about this
|
|
||||||
verbose_logger.debug(
|
|
||||||
"model={}, custom_llm_provider={} has no output_cost_per_token in model_cost_map. Defaulting to 0.".format(
|
|
||||||
model, custom_llm_provider
|
|
||||||
)
|
|
||||||
)
|
|
||||||
_output_cost_per_token = 0
|
|
||||||
|
|
||||||
return ModelInfo(
|
|
||||||
key=key,
|
|
||||||
max_tokens=_model_info.get("max_tokens", None),
|
|
||||||
max_input_tokens=_model_info.get("max_input_tokens", None),
|
|
||||||
max_output_tokens=_model_info.get("max_output_tokens", None),
|
|
||||||
input_cost_per_token=_input_cost_per_token,
|
|
||||||
cache_creation_input_token_cost=_model_info.get(
|
|
||||||
"cache_creation_input_token_cost", None
|
|
||||||
),
|
|
||||||
cache_read_input_token_cost=_model_info.get(
|
|
||||||
"cache_read_input_token_cost", None
|
|
||||||
),
|
|
||||||
input_cost_per_character=_model_info.get(
|
|
||||||
"input_cost_per_character", None
|
|
||||||
),
|
|
||||||
input_cost_per_token_above_128k_tokens=_model_info.get(
|
|
||||||
"input_cost_per_token_above_128k_tokens", None
|
|
||||||
),
|
|
||||||
input_cost_per_query=_model_info.get("input_cost_per_query", None),
|
|
||||||
input_cost_per_second=_model_info.get("input_cost_per_second", None),
|
|
||||||
input_cost_per_audio_token=_model_info.get(
|
|
||||||
"input_cost_per_audio_token", None
|
|
||||||
),
|
|
||||||
output_cost_per_token=_output_cost_per_token,
|
|
||||||
output_cost_per_audio_token=_model_info.get(
|
|
||||||
"output_cost_per_audio_token", None
|
|
||||||
),
|
|
||||||
output_cost_per_character=_model_info.get(
|
|
||||||
"output_cost_per_character", None
|
|
||||||
),
|
|
||||||
output_cost_per_token_above_128k_tokens=_model_info.get(
|
|
||||||
"output_cost_per_token_above_128k_tokens", None
|
|
||||||
),
|
|
||||||
output_cost_per_character_above_128k_tokens=_model_info.get(
|
|
||||||
"output_cost_per_character_above_128k_tokens", None
|
|
||||||
),
|
|
||||||
output_cost_per_second=_model_info.get("output_cost_per_second", None),
|
|
||||||
output_cost_per_image=_model_info.get("output_cost_per_image", None),
|
|
||||||
output_vector_size=_model_info.get("output_vector_size", None),
|
|
||||||
litellm_provider=_model_info.get(
|
|
||||||
"litellm_provider", custom_llm_provider
|
|
||||||
),
|
|
||||||
mode=_model_info.get("mode"), # type: ignore
|
|
||||||
supported_openai_params=supported_openai_params,
|
|
||||||
supports_system_messages=_model_info.get(
|
|
||||||
"supports_system_messages", None
|
|
||||||
),
|
|
||||||
supports_response_schema=_model_info.get(
|
|
||||||
"supports_response_schema", None
|
|
||||||
),
|
|
||||||
supports_vision=_model_info.get("supports_vision", False),
|
|
||||||
supports_function_calling=_model_info.get(
|
|
||||||
"supports_function_calling", False
|
|
||||||
),
|
|
||||||
supports_assistant_prefill=_model_info.get(
|
|
||||||
"supports_assistant_prefill", False
|
|
||||||
),
|
|
||||||
supports_prompt_caching=_model_info.get(
|
|
||||||
"supports_prompt_caching", False
|
|
||||||
),
|
|
||||||
supports_audio_input=_model_info.get("supports_audio_input", False),
|
|
||||||
supports_audio_output=_model_info.get("supports_audio_output", False),
|
|
||||||
supports_pdf_input=_model_info.get("supports_pdf_input", False),
|
|
||||||
tpm=_model_info.get("tpm", None),
|
|
||||||
rpm=_model_info.get("rpm", None),
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
if "OllamaError" in str(e):
|
|
||||||
raise e
|
|
||||||
raise Exception(
|
|
||||||
"This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json.".format(
|
|
||||||
model, custom_llm_provider
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def json_schema_type(python_type_name: str):
|
def json_schema_type(python_type_name: str):
|
||||||
|
|
|
@ -13,7 +13,8 @@
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true,
|
"supports_audio_output": true,
|
||||||
"supports_prompt_caching": true,
|
"supports_prompt_caching": true,
|
||||||
"supports_response_schema": true
|
"supports_response_schema": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"sambanova/Meta-Llama-3.1-8B-Instruct": {
|
"sambanova/Meta-Llama-3.1-8B-Instruct": {
|
||||||
"max_tokens": 16000,
|
"max_tokens": 16000,
|
||||||
|
@ -94,7 +95,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o": {
|
"gpt-4o": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -109,7 +111,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-audio-preview": {
|
"gpt-4o-audio-preview": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -124,7 +127,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-audio-preview-2024-10-01": {
|
"gpt-4o-audio-preview-2024-10-01": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -139,7 +143,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-mini-audio-preview-2024-12-17": {
|
"gpt-4o-mini-audio-preview-2024-12-17": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -154,7 +159,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-mini": {
|
"gpt-4o-mini": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -169,7 +175,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-mini-2024-07-18": {
|
"gpt-4o-mini-2024-07-18": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -184,7 +191,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"o1": {
|
"o1": {
|
||||||
"max_tokens": 100000,
|
"max_tokens": 100000,
|
||||||
|
@ -198,7 +206,9 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_response_schema": true
|
||||||
},
|
},
|
||||||
"o1-mini": {
|
"o1-mini": {
|
||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
|
@ -209,8 +219,6 @@
|
||||||
"cache_read_input_token_cost": 0.0000015,
|
"cache_read_input_token_cost": 0.0000015,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
|
||||||
"supports_parallel_function_calling": true,
|
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true
|
||||||
},
|
},
|
||||||
|
@ -223,8 +231,6 @@
|
||||||
"cache_read_input_token_cost": 0.0000015,
|
"cache_read_input_token_cost": 0.0000015,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
|
||||||
"supports_parallel_function_calling": true,
|
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true
|
||||||
},
|
},
|
||||||
|
@ -237,8 +243,6 @@
|
||||||
"cache_read_input_token_cost": 0.0000075,
|
"cache_read_input_token_cost": 0.0000075,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
|
||||||
"supports_parallel_function_calling": true,
|
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true
|
||||||
},
|
},
|
||||||
|
@ -251,8 +255,6 @@
|
||||||
"cache_read_input_token_cost": 0.0000075,
|
"cache_read_input_token_cost": 0.0000075,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
|
||||||
"supports_parallel_function_calling": true,
|
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true
|
||||||
},
|
},
|
||||||
|
@ -268,7 +270,9 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_response_schema": true
|
||||||
},
|
},
|
||||||
"chatgpt-4o-latest": {
|
"chatgpt-4o-latest": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -281,7 +285,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-2024-05-13": {
|
"gpt-4o-2024-05-13": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -294,7 +299,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-2024-08-06": {
|
"gpt-4o-2024-08-06": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -309,7 +315,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-2024-11-20": {
|
"gpt-4o-2024-11-20": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -324,7 +331,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-realtime-preview-2024-10-01": {
|
"gpt-4o-realtime-preview-2024-10-01": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -341,7 +349,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-realtime-preview": {
|
"gpt-4o-realtime-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -357,7 +366,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-realtime-preview-2024-12-17": {
|
"gpt-4o-realtime-preview-2024-12-17": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -373,7 +383,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-mini-realtime-preview": {
|
"gpt-4o-mini-realtime-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -390,7 +401,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4o-mini-realtime-preview-2024-12-17": {
|
"gpt-4o-mini-realtime-preview-2024-12-17": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -407,7 +419,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_audio_input": true,
|
"supports_audio_input": true,
|
||||||
"supports_audio_output": true
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-turbo-preview": {
|
"gpt-4-turbo-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -419,7 +432,8 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-0314": {
|
"gpt-4-0314": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -429,7 +443,8 @@
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-0613": {
|
"gpt-4-0613": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -440,7 +455,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-32k": {
|
"gpt-4-32k": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -450,7 +466,8 @@
|
||||||
"output_cost_per_token": 0.00012,
|
"output_cost_per_token": 0.00012,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-32k-0314": {
|
"gpt-4-32k-0314": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -460,7 +477,8 @@
|
||||||
"output_cost_per_token": 0.00012,
|
"output_cost_per_token": 0.00012,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-32k-0613": {
|
"gpt-4-32k-0613": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -470,7 +488,8 @@
|
||||||
"output_cost_per_token": 0.00012,
|
"output_cost_per_token": 0.00012,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-turbo": {
|
"gpt-4-turbo": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -483,7 +502,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-turbo-2024-04-09": {
|
"gpt-4-turbo-2024-04-09": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -496,7 +516,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-1106-preview": {
|
"gpt-4-1106-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -508,7 +529,8 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-0125-preview": {
|
"gpt-4-0125-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -520,7 +542,8 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-vision-preview": {
|
"gpt-4-vision-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -531,7 +554,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-4-1106-vision-preview": {
|
"gpt-4-1106-vision-preview": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -542,7 +566,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo": {
|
"gpt-3.5-turbo": {
|
||||||
"max_tokens": 4097,
|
"max_tokens": 4097,
|
||||||
|
@ -553,7 +578,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-0301": {
|
"gpt-3.5-turbo-0301": {
|
||||||
"max_tokens": 4097,
|
"max_tokens": 4097,
|
||||||
|
@ -563,7 +589,8 @@
|
||||||
"output_cost_per_token": 0.000002,
|
"output_cost_per_token": 0.000002,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-0613": {
|
"gpt-3.5-turbo-0613": {
|
||||||
"max_tokens": 4097,
|
"max_tokens": 4097,
|
||||||
|
@ -574,7 +601,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-1106": {
|
"gpt-3.5-turbo-1106": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -586,7 +614,8 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-0125": {
|
"gpt-3.5-turbo-0125": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -598,7 +627,8 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-16k": {
|
"gpt-3.5-turbo-16k": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -608,7 +638,8 @@
|
||||||
"output_cost_per_token": 0.000004,
|
"output_cost_per_token": 0.000004,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-16k-0613": {
|
"gpt-3.5-turbo-16k-0613": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -618,7 +649,8 @@
|
||||||
"output_cost_per_token": 0.000004,
|
"output_cost_per_token": 0.000004,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-3.5-turbo": {
|
"ft:gpt-3.5-turbo": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -627,7 +659,8 @@
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000006,
|
"output_cost_per_token": 0.000006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-3.5-turbo-0125": {
|
"ft:gpt-3.5-turbo-0125": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -636,7 +669,8 @@
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000006,
|
"output_cost_per_token": 0.000006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-3.5-turbo-1106": {
|
"ft:gpt-3.5-turbo-1106": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -645,7 +679,8 @@
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000006,
|
"output_cost_per_token": 0.000006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-3.5-turbo-0613": {
|
"ft:gpt-3.5-turbo-0613": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -654,7 +689,8 @@
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000006,
|
"output_cost_per_token": 0.000006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-4-0613": {
|
"ft:gpt-4-0613": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -665,7 +701,8 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
|
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing",
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-4o-2024-08-06": {
|
"ft:gpt-4o-2024-08-06": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -678,7 +715,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true
|
"supports_vision": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-4o-2024-11-20": {
|
"ft:gpt-4o-2024-11-20": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -693,7 +731,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:gpt-4o-mini-2024-07-18": {
|
"ft:gpt-4o-mini-2024-07-18": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -708,7 +747,8 @@
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true
|
||||||
},
|
},
|
||||||
"ft:davinci-002": {
|
"ft:davinci-002": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -3166,6 +3206,42 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
|
||||||
|
},
|
||||||
|
"gemini/gemini-2.0-flash-exp": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 1048576,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_pdf_size_mb": 30,
|
||||||
|
"input_cost_per_image": 0,
|
||||||
|
"input_cost_per_video_per_second": 0,
|
||||||
|
"input_cost_per_audio_per_second": 0,
|
||||||
|
"input_cost_per_token": 0,
|
||||||
|
"input_cost_per_character": 0,
|
||||||
|
"input_cost_per_token_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_character_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_image_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"output_cost_per_character": 0,
|
||||||
|
"output_cost_per_token_above_128k_tokens": 0,
|
||||||
|
"output_cost_per_character_above_128k_tokens": 0,
|
||||||
|
"litellm_provider": "gemini",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"tpm": 4000000,
|
||||||
|
"rpm": 10,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
|
||||||
},
|
},
|
||||||
"vertex_ai/claude-3-sonnet": {
|
"vertex_ai/claude-3-sonnet": {
|
||||||
|
|
|
@ -17,14 +17,19 @@ import litellm
|
||||||
from litellm import Choices, Message, ModelResponse
|
from litellm import Choices, Message, ModelResponse
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model", ["o1-preview", "o1-mini", "o1"])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_o1_handle_system_role():
|
async def test_o1_handle_system_role(model):
|
||||||
"""
|
"""
|
||||||
Tests that:
|
Tests that:
|
||||||
- max_tokens is translated to 'max_completion_tokens'
|
- max_tokens is translated to 'max_completion_tokens'
|
||||||
- role 'system' is translated to 'user'
|
- role 'system' is translated to 'user'
|
||||||
"""
|
"""
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
|
from litellm.utils import supports_system_messages
|
||||||
|
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
@ -35,9 +40,9 @@ async def test_o1_handle_system_role():
|
||||||
) as mock_client:
|
) as mock_client:
|
||||||
try:
|
try:
|
||||||
await litellm.acompletion(
|
await litellm.acompletion(
|
||||||
model="o1-preview",
|
model=model,
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
messages=[{"role": "system", "content": "Hello!"}],
|
messages=[{"role": "system", "content": "Be a good bot!"}],
|
||||||
client=client,
|
client=client,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -48,9 +53,73 @@ async def test_o1_handle_system_role():
|
||||||
|
|
||||||
print("request_body: ", request_body)
|
print("request_body: ", request_body)
|
||||||
|
|
||||||
assert request_body["model"] == "o1-preview"
|
assert request_body["model"] == model
|
||||||
assert request_body["max_completion_tokens"] == 10
|
assert request_body["max_completion_tokens"] == 10
|
||||||
assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
|
if supports_system_messages(model, "openai"):
|
||||||
|
assert request_body["messages"] == [
|
||||||
|
{"role": "system", "content": "Be a good bot!"}
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
assert request_body["messages"] == [
|
||||||
|
{"role": "user", "content": "Be a good bot!"}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model, expected_tool_calling_support",
|
||||||
|
[("o1-preview", False), ("o1-mini", False), ("o1", True)],
|
||||||
|
)
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_o1_handle_tool_calling_optional_params(
|
||||||
|
model, expected_tool_calling_support
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Tests that:
|
||||||
|
- max_tokens is translated to 'max_completion_tokens'
|
||||||
|
- role 'system' is translated to 'user'
|
||||||
|
"""
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
from litellm.utils import ProviderConfigManager
|
||||||
|
from litellm.types.utils import LlmProviders
|
||||||
|
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
config = ProviderConfigManager.get_provider_chat_config(
|
||||||
|
model=model, provider=LlmProviders.OPENAI
|
||||||
|
)
|
||||||
|
|
||||||
|
supported_params = config.get_supported_openai_params(model=model)
|
||||||
|
|
||||||
|
assert expected_tool_calling_support == ("tools" in supported_params)
|
||||||
|
|
||||||
|
|
||||||
|
# @pytest.mark.parametrize(
|
||||||
|
# "model",
|
||||||
|
# ["o1"], # "o1-preview", "o1-mini",
|
||||||
|
# )
|
||||||
|
# @pytest.mark.asyncio
|
||||||
|
# async def test_o1_handle_streaming_e2e(model):
|
||||||
|
# """
|
||||||
|
# Tests that:
|
||||||
|
# - max_tokens is translated to 'max_completion_tokens'
|
||||||
|
# - role 'system' is translated to 'user'
|
||||||
|
# """
|
||||||
|
# from openai import AsyncOpenAI
|
||||||
|
# from litellm.utils import ProviderConfigManager
|
||||||
|
# from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
|
||||||
|
# from litellm.types.utils import LlmProviders
|
||||||
|
|
||||||
|
# resp = litellm.completion(
|
||||||
|
# model=model,
|
||||||
|
# messages=[{"role": "user", "content": "Hello!"}],
|
||||||
|
# stream=True,
|
||||||
|
# )
|
||||||
|
# assert isinstance(resp, CustomStreamWrapper)
|
||||||
|
# for chunk in resp:
|
||||||
|
# print("chunk: ", chunk)
|
||||||
|
|
||||||
|
# assert True
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
@ -2072,6 +2072,7 @@ def test_openai_chat_completion_complete_response_call():
|
||||||
"azure/chatgpt-v-2",
|
"azure/chatgpt-v-2",
|
||||||
"claude-3-haiku-20240307",
|
"claude-3-haiku-20240307",
|
||||||
"o1-preview",
|
"o1-preview",
|
||||||
|
"o1",
|
||||||
"azure/fake-o1-mini",
|
"azure/fake-o1-mini",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue