fix(health.md): add rerank model health check information (#7295)

* fix(health.md): add rerank model health check information

* build(model_prices_and_context_window.json): add gemini 2.0 for google ai studio - pricing + commercial rate limits

* build(model_prices_and_context_window.json): add gemini-2.0 supports audio output = true

* docs(team_model_add.md): clarify allowing teams to add models is an enterprise feature

* fix(o1_transformation.py): add support for 'n', 'response_format' and 'stop' params for o1 and 'stream_options' param for o1-mini

* build(model_prices_and_context_window.json): add 'supports_system_message' to supporting openai models

needed as o1-preview, and o1-mini models don't support 'system message

* fix(o1_transformation.py): translate system message based on if o1 model supports it

* fix(o1_transformation.py): return 'stream' param support if o1-mini/o1-preview

o1 currently doesn't support streaming, but the other model versions do

Fixes https://github.com/BerriAI/litellm/issues/7292

* fix(o1_transformation.py): return tool calling/response_format in supported params if model map says so

Fixes https://github.com/BerriAI/litellm/issues/7292

* fix: fix linting errors

* fix: update '_transform_messages'

* fix(o1_transformation.py): fix provider passed for supported param checks

* test(base_llm_unit_tests.py): skip test if api takes >5s to respond

* fix(utils.py): return false in 'supports_factory' if can't find value

* fix(o1_transformation.py): always return stream + stream_options as supported params + handle stream options being passed in for azure o1

* feat(openai.py): support stream faking natively in openai handler

Allows o1 calls to be faked for just the "o1" model, allows native streaming for o1-mini, o1-preview

 Fixes https://github.com/BerriAI/litellm/issues/7292

* fix(openai.py): use inference param instead of original optional param
This commit is contained in:
Krish Dholakia 2024-12-18 19:18:10 -08:00 committed by GitHub
parent e95820367f
commit 1a4910f6c0
34 changed files with 800 additions and 515 deletions

View file

@ -121,6 +121,20 @@ model_list:
mode: audio_speech mode: audio_speech
``` ```
### Rerank Models
To run rerank health checks, specify the mode as "rerank" in your config for the relevant model.
```yaml
model_list:
- model_name: rerank-english-v3.0
litellm_params:
model: cohere/rerank-english-v3.0
api_key: os.environ/COHERE_API_KEY
model_info:
mode: rerank
```
### Batch Models (Azure Only) ### Batch Models (Azure Only)
For Azure models deployed as 'batch' models, set `mode: batch`. For Azure models deployed as 'batch' models, set `mode: batch`.

View file

@ -1,4 +1,13 @@
# Allow Teams to Add Models # ✨ Allow Teams to Add Models
:::info
This is an Enterprise feature.
[Enterprise Pricing](https://www.litellm.ai/#pricing)
[Contact us here to get a free trial](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
:::
Allow team to add a their own models/key for that project - so any OpenAI call they make uses their OpenAI key. Allow team to add a their own models/key for that project - so any OpenAI call they make uses their OpenAI key.

View file

@ -3144,7 +3144,9 @@ def prompt_factory(
else: else:
return gemini_text_image_pt(messages=messages) return gemini_text_image_pt(messages=messages)
elif custom_llm_provider == "mistral": elif custom_llm_provider == "mistral":
return litellm.MistralConfig()._transform_messages(messages=messages) return litellm.MistralConfig()._transform_messages(
messages=messages, model=model
)
elif custom_llm_provider == "bedrock": elif custom_llm_provider == "bedrock":
if "amazon.titan-text" in model: if "amazon.titan-text" in model:
return amazon_titan_pt(messages=messages) return amazon_titan_pt(messages=messages)

View file

@ -260,12 +260,6 @@ class AnthropicTextConfig(BaseConfig):
return str(prompt) return str(prompt)
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
"Not required"
raise NotImplementedError
def get_model_response_iterator( def get_model_response_iterator(
self, self,
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse], streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],

View file

@ -57,6 +57,7 @@ class AzureOpenAIO1ChatCompletion(AzureChatCompletion):
client=None, client=None,
): ):
stream: Optional[bool] = optional_params.pop("stream", False) stream: Optional[bool] = optional_params.pop("stream", False)
stream_options: Optional[dict] = optional_params.pop("stream_options", None)
response = super().completion( response = super().completion(
model, model,
messages, messages,
@ -90,6 +91,7 @@ class AzureOpenAIO1ChatCompletion(AzureChatCompletion):
model=model, model=model,
custom_llm_provider="openai", custom_llm_provider="openai",
logging_obj=logging_obj, logging_obj=logging_obj,
stream_options=stream_options,
) )
return streaming_response return streaming_response

View file

@ -2,11 +2,11 @@ from typing import List, Optional, Tuple
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.llms.openai.openai import OpenAIConfig
from litellm.litellm_core_utils.prompt_templates.common_utils import ( from litellm.litellm_core_utils.prompt_templates.common_utils import (
_audio_or_image_in_message_content, _audio_or_image_in_message_content,
convert_content_list_to_str, convert_content_list_to_str,
) )
from litellm.llms.openai.openai import OpenAIConfig
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ProviderField from litellm.types.utils import ProviderField
@ -33,6 +33,7 @@ class AzureAIStudioConfig(OpenAIConfig):
def _transform_messages( def _transform_messages(
self, self,
messages: List[AllMessageValues], messages: List[AllMessageValues],
model: str,
) -> List: ) -> List:
""" """
- Azure AI Studio doesn't support content as a list. This handles: - Azure AI Studio doesn't support content as a list. This handles:

View file

@ -82,6 +82,14 @@ class BaseConfig(ABC):
and v is not None and v is not None
} }
def should_fake_stream(
self, model: str, custom_llm_provider: Optional[str] = None
) -> bool:
"""
Returns True if the model/provider should fake stream
"""
return False
@abstractmethod @abstractmethod
def get_supported_openai_params(self, model: str) -> list: def get_supported_openai_params(self, model: str) -> list:
pass pass

View file

@ -131,11 +131,6 @@ class ClarifaiConfig(BaseConfig):
headers["Authorization"] = f"Bearer {api_key}" headers["Authorization"] = f"Bearer {api_key}"
return headers return headers
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
raise NotImplementedError
def get_error_class( def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException: ) -> BaseLLMException:

View file

@ -158,11 +158,6 @@ class CloudflareChatConfig(BaseConfig):
message=error_message, message=error_message,
) )
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
raise NotImplementedError
def get_model_response_iterator( def get_model_response_iterator(
self, self,
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse], streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],

View file

@ -365,8 +365,3 @@ class CohereChatConfig(BaseConfig):
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException: ) -> BaseLLMException:
return CohereError(status_code=status_code, message=error_message) return CohereError(status_code=status_code, message=error_message)
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
raise NotImplementedError

View file

@ -121,12 +121,6 @@ class CohereTextConfig(BaseConfig):
api_key=api_key, api_key=api_key,
) )
def _transform_messages(
self,
messages: List[AllMessageValues],
) -> List[AllMessageValues]:
raise NotImplementedError
def get_error_class( def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException: ) -> BaseLLMException:

View file

@ -2,11 +2,12 @@
Handles the chat completion request for Databricks Handles the chat completion request for Databricks
""" """
from typing import Any, Callable, Literal, Optional, Tuple, Union from typing import Any, Callable, List, Literal, Optional, Tuple, Union, cast
from httpx._config import Timeout from httpx._config import Timeout
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import CustomStreamingDecoder from litellm.types.utils import CustomStreamingDecoder
from litellm.utils import ModelResponse from litellm.utils import ModelResponse
@ -44,7 +45,9 @@ class DatabricksChatCompletion(OpenAILikeChatHandler, DatabricksBase):
streaming_decoder: Optional[CustomStreamingDecoder] = None, streaming_decoder: Optional[CustomStreamingDecoder] = None,
fake_stream: bool = False, fake_stream: bool = False,
): ):
messages = DatabricksConfig()._transform_messages(messages) # type: ignore messages = DatabricksConfig()._transform_messages(
messages=cast(List[AllMessageValues], messages), model=model
)
api_base, headers = self.databricks_validate_environment( api_base, headers = self.databricks_validate_environment(
api_base=api_base, api_base=api_base,
api_key=api_key, api_key=api_key,

View file

@ -7,14 +7,14 @@ from typing import List, Optional, Union
from pydantic import BaseModel from pydantic import BaseModel
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ProviderField
from ...openai_like.chat.transformation import OpenAILikeChatConfig
from litellm.litellm_core_utils.prompt_templates.common_utils import ( from litellm.litellm_core_utils.prompt_templates.common_utils import (
handle_messages_with_content_list_to_str_conversion, handle_messages_with_content_list_to_str_conversion,
strip_name_from_messages, strip_name_from_messages,
) )
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ProviderField
from ...openai_like.chat.transformation import OpenAILikeChatConfig
class DatabricksConfig(OpenAILikeChatConfig): class DatabricksConfig(OpenAILikeChatConfig):
@ -86,7 +86,7 @@ class DatabricksConfig(OpenAILikeChatConfig):
return False return False
def _transform_messages( def _transform_messages(
self, messages: List[AllMessageValues] self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]: ) -> List[AllMessageValues]:
""" """
Databricks does not support: Databricks does not support:
@ -102,4 +102,4 @@ class DatabricksConfig(OpenAILikeChatConfig):
new_messages.append(_message) new_messages.append(_message)
new_messages = handle_messages_with_content_list_to_str_conversion(new_messages) new_messages = handle_messages_with_content_list_to_str_conversion(new_messages)
new_messages = strip_name_from_messages(new_messages) new_messages = strip_name_from_messages(new_messages)
return super()._transform_messages(new_messages) return super()._transform_messages(messages=new_messages, model=model)

View file

@ -8,26 +8,26 @@ from typing import List, Optional, Tuple, Union
from pydantic import BaseModel from pydantic import BaseModel
import litellm import litellm
from litellm.litellm_core_utils.prompt_templates.common_utils import (
handle_messages_with_content_list_to_str_conversion,
)
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
from ....utils import _remove_additional_properties, _remove_strict_from_schema from ....utils import _remove_additional_properties, _remove_strict_from_schema
from ...openai.chat.gpt_transformation import OpenAIGPTConfig from ...openai.chat.gpt_transformation import OpenAIGPTConfig
from litellm.litellm_core_utils.prompt_templates.common_utils import (
handle_messages_with_content_list_to_str_conversion,
)
class DeepSeekChatConfig(OpenAIGPTConfig): class DeepSeekChatConfig(OpenAIGPTConfig):
def _transform_messages( def _transform_messages(
self, messages: List[AllMessageValues] self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]: ) -> List[AllMessageValues]:
""" """
DeepSeek does not support content in list format. DeepSeek does not support content in list format.
""" """
messages = handle_messages_with_content_list_to_str_conversion(messages) messages = handle_messages_with_content_list_to_str_conversion(messages)
return super()._transform_messages(messages) return super()._transform_messages(messages=messages, model=model)
def _get_openai_compatible_provider_info( def _get_openai_compatible_provider_info(
self, api_base: Optional[str], api_key: Optional[str] self, api_base: Optional[str], api_key: Optional[str]

View file

@ -2,11 +2,12 @@
Handles the chat completion request for groq Handles the chat completion request for groq
""" """
from typing import Any, Callable, Optional, Union from typing import Any, Callable, List, Optional, Union, cast
from httpx._config import Timeout from httpx._config import Timeout
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import CustomStreamingDecoder from litellm.types.utils import CustomStreamingDecoder
from litellm.utils import ModelResponse from litellm.utils import ModelResponse
@ -42,7 +43,9 @@ class GroqChatCompletion(OpenAILikeChatHandler):
streaming_decoder: Optional[CustomStreamingDecoder] = None, streaming_decoder: Optional[CustomStreamingDecoder] = None,
fake_stream: bool = False, fake_stream: bool = False,
): ):
messages = GroqChatConfig()._transform_messages(messages) # type: ignore messages = GroqChatConfig()._transform_messages(
messages=cast(List[AllMessageValues], messages), model=model
)
if optional_params.get("stream") is True: if optional_params.get("stream") is True:
fake_stream = GroqChatConfig()._should_fake_stream(optional_params) fake_stream = GroqChatConfig()._should_fake_stream(optional_params)

View file

@ -61,7 +61,7 @@ class GroqChatConfig(OpenAIGPTConfig):
def get_config(cls): def get_config(cls):
return super().get_config() return super().get_config()
def _transform_messages(self, messages: List[AllMessageValues]) -> List: def _transform_messages(self, messages: List[AllMessageValues], model: str) -> List:
for idx, message in enumerate(messages): for idx, message in enumerate(messages):
""" """
1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839 1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839

View file

@ -369,12 +369,6 @@ class HuggingfaceChatConfig(BaseConfig):
headers = {**headers, **default_headers} headers = {**headers, **default_headers}
return headers return headers
def _transform_messages(
self,
messages: List[AllMessageValues],
) -> List[AllMessageValues]:
return messages
def get_error_class( def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException: ) -> BaseLLMException:

View file

@ -9,11 +9,11 @@ Docs - https://docs.mistral.ai/api/
import types import types
from typing import List, Literal, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
from litellm.litellm_core_utils.prompt_templates.common_utils import ( from litellm.litellm_core_utils.prompt_templates.common_utils import (
handle_messages_with_content_list_to_str_conversion, handle_messages_with_content_list_to_str_conversion,
strip_none_values_from_message, strip_none_values_from_message,
) )
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues from litellm.types.llms.openai import AllMessageValues
@ -148,7 +148,7 @@ class MistralConfig(OpenAIGPTConfig):
return api_base, dynamic_api_key return api_base, dynamic_api_key
def _transform_messages( def _transform_messages(
self, messages: List[AllMessageValues] self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]: ) -> List[AllMessageValues]:
""" """
- handles scenario where content is list and not string - handles scenario where content is list and not string

View file

@ -23,6 +23,7 @@ from litellm.types.llms.openai import (
from litellm.types.utils import ( from litellm.types.utils import (
GenericStreamingChunk, GenericStreamingChunk,
ModelInfo, ModelInfo,
ModelInfoBase,
ModelResponse, ModelResponse,
ProviderField, ProviderField,
StreamingChoices, StreamingChoices,
@ -198,7 +199,7 @@ class OllamaConfig(BaseConfig):
return v return v
return None return None
def get_model_info(self, model: str) -> ModelInfo: def get_model_info(self, model: str) -> ModelInfoBase:
""" """
curl http://localhost:11434/api/show -d '{ curl http://localhost:11434/api/show -d '{
"name": "mistral" "name": "mistral"
@ -222,11 +223,10 @@ class OllamaConfig(BaseConfig):
_max_tokens: Optional[int] = self._get_max_tokens(model_info) _max_tokens: Optional[int] = self._get_max_tokens(model_info)
return ModelInfo( return ModelInfoBase(
key=model, key=model,
litellm_provider="ollama", litellm_provider="ollama",
mode="chat", mode="chat",
supported_openai_params=self.get_supported_openai_params(model=model),
supports_function_calling=self._supports_function_calling(model_info), supports_function_calling=self._supports_function_calling(model_info),
input_cost_per_token=0.0, input_cost_per_token=0.0,
output_cost_per_token=0.0, output_cost_per_token=0.0,
@ -235,11 +235,6 @@ class OllamaConfig(BaseConfig):
max_output_tokens=_max_tokens, max_output_tokens=_max_tokens,
) )
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
return messages
def get_error_class( def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, Headers] self, error_message: str, status_code: int, headers: Union[dict, Headers]
) -> BaseLLMException: ) -> BaseLLMException:

View file

@ -23,11 +23,6 @@ else:
class OobaboogaConfig(OpenAIGPTConfig): class OobaboogaConfig(OpenAIGPTConfig):
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
return messages
def get_error_class( def get_error_class(
self, self,
error_message: str, error_message: str,

View file

@ -164,7 +164,7 @@ class OpenAIGPTConfig(BaseConfig):
) )
def _transform_messages( def _transform_messages(
self, messages: List[AllMessageValues] self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]: ) -> List[AllMessageValues]:
return messages return messages

View file

@ -15,7 +15,14 @@ import types
from typing import Any, List, Optional, Union from typing import Any, List, Optional, Union
import litellm import litellm
from litellm import verbose_logger
from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
from litellm.utils import (
supports_function_calling,
supports_response_schema,
supports_system_messages,
)
from .gpt_transformation import OpenAIGPTConfig from .gpt_transformation import OpenAIGPTConfig
@ -29,6 +36,15 @@ class OpenAIO1Config(OpenAIGPTConfig):
def get_config(cls): def get_config(cls):
return super().get_config() return super().get_config()
def should_fake_stream(
self, model: str, custom_llm_provider: Optional[str] = None
) -> bool:
supported_stream_models = ["o1-mini", "o1-preview"]
for supported_model in supported_stream_models:
if supported_model in model:
return False
return True
def get_supported_openai_params(self, model: str) -> list: def get_supported_openai_params(self, model: str) -> list:
""" """
Get the supported OpenAI params for the given model Get the supported OpenAI params for the given model
@ -38,21 +54,37 @@ class OpenAIO1Config(OpenAIGPTConfig):
all_openai_params = super().get_supported_openai_params(model=model) all_openai_params = super().get_supported_openai_params(model=model)
non_supported_params = [ non_supported_params = [
"logprobs", "logprobs",
"tools",
"tool_choice",
"parallel_tool_calls",
"function_call",
"functions",
"top_p", "top_p",
"n",
"presence_penalty", "presence_penalty",
"frequency_penalty", "frequency_penalty",
"top_logprobs", "top_logprobs",
"response_format",
"stop",
"stream_options",
] ]
try:
model, custom_llm_provider, api_base, api_key = get_llm_provider(
model=model
)
except Exception:
verbose_logger.debug(
f"Unable to infer model provider for model={model}, defaulting to openai for o1 supported param check"
)
custom_llm_provider = "openai"
_supports_function_calling = supports_function_calling(
model, custom_llm_provider
)
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
if not _supports_function_calling:
non_supported_params.append("tools")
non_supported_params.append("tool_choice")
non_supported_params.append("parallel_tool_calls")
non_supported_params.append("function_call")
non_supported_params.append("functions")
if not _supports_response_schema:
non_supported_params.append("response_format")
return [ return [
param for param in all_openai_params if param not in non_supported_params param for param in all_openai_params if param not in non_supported_params
] ]
@ -95,16 +127,16 @@ class OpenAIO1Config(OpenAIGPTConfig):
return False return False
def _transform_messages( def _transform_messages(
self, messages: List[AllMessageValues] self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]: ) -> List[AllMessageValues]:
""" """
Handles limitations of O-1 model family. Handles limitations of O-1 model family.
- modalities: image => drop param (if user opts in to dropping param) - modalities: image => drop param (if user opts in to dropping param)
- role: system ==> translate to role 'user' - role: system ==> translate to role 'user'
""" """
_supports_system_messages = supports_system_messages(model, "openai")
for i, message in enumerate(messages): for i, message in enumerate(messages):
if message["role"] == "system": if message["role"] == "system" and not _supports_system_messages:
new_message = ChatCompletionUserMessage( new_message = ChatCompletionUserMessage(
content=message["content"], role="user" content=message["content"], role="user"
) )

View file

@ -33,6 +33,7 @@ from litellm.litellm_core_utils.prompt_templates.factory import (
prompt_factory, prompt_factory,
) )
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
from litellm.types.utils import ( from litellm.types.utils import (
@ -198,7 +199,7 @@ class OpenAIConfig(BaseConfig):
return optional_params return optional_params
def _transform_messages( def _transform_messages(
self, messages: List[AllMessageValues] self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]: ) -> List[AllMessageValues]:
return messages return messages
@ -410,6 +411,24 @@ class OpenAIChatCompletion(BaseLLM):
else: else:
raise e raise e
def mock_streaming(
self,
response: ModelResponse,
logging_obj: LiteLLMLoggingObj,
model: str,
stream_options: Optional[dict] = None,
) -> CustomStreamWrapper:
completion_stream = MockResponseIterator(model_response=response)
streaming_response = CustomStreamWrapper(
completion_stream=completion_stream,
model=model,
custom_llm_provider="openai",
logging_obj=logging_obj,
stream_options=stream_options,
)
return streaming_response
def completion( # type: ignore # noqa: PLR0915 def completion( # type: ignore # noqa: PLR0915
self, self,
model_response: ModelResponse, model_response: ModelResponse,
@ -433,8 +452,21 @@ class OpenAIChatCompletion(BaseLLM):
): ):
super().completion() super().completion()
try: try:
fake_stream: bool = False
if custom_llm_provider is not None and model is not None:
provider_config = ProviderConfigManager.get_provider_chat_config(
model=model, provider=LlmProviders(custom_llm_provider)
)
fake_stream = provider_config.should_fake_stream(
model=model, custom_llm_provider=custom_llm_provider
)
inference_params = optional_params.copy()
stream_options: Optional[dict] = inference_params.pop(
"stream_options", None
)
stream: Optional[bool] = inference_params.pop("stream", False)
if headers: if headers:
optional_params["extra_headers"] = headers inference_params["extra_headers"] = headers
if model is None or messages is None: if model is None or messages is None:
raise OpenAIError(status_code=422, message="Missing model or messages") raise OpenAIError(status_code=422, message="Missing model or messages")
@ -456,7 +488,9 @@ class OpenAIChatCompletion(BaseLLM):
if isinstance(provider_config, OpenAIGPTConfig) or isinstance( if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
provider_config, OpenAIConfig provider_config, OpenAIConfig
): ):
messages = provider_config._transform_messages(messages) messages = provider_config._transform_messages(
messages=messages, model=model
)
for _ in range( for _ in range(
2 2
@ -464,7 +498,7 @@ class OpenAIChatCompletion(BaseLLM):
data = OpenAIConfig().transform_request( data = OpenAIConfig().transform_request(
model=model, model=model,
messages=messages, messages=messages,
optional_params=optional_params, optional_params=inference_params,
litellm_params=litellm_params, litellm_params=litellm_params,
headers=headers or {}, headers=headers or {},
) )
@ -472,7 +506,7 @@ class OpenAIChatCompletion(BaseLLM):
try: try:
max_retries = data.pop("max_retries", 2) max_retries = data.pop("max_retries", 2)
if acompletion is True: if acompletion is True:
if optional_params.get("stream", False): if stream is True and fake_stream is False:
return self.async_streaming( return self.async_streaming(
logging_obj=logging_obj, logging_obj=logging_obj,
headers=headers, headers=headers,
@ -485,11 +519,13 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=max_retries, max_retries=max_retries,
organization=organization, organization=organization,
drop_params=drop_params, drop_params=drop_params,
stream_options=stream_options,
) )
else: else:
return self.acompletion( return self.acompletion(
data=data, data=data,
headers=headers, headers=headers,
model=model,
logging_obj=logging_obj, logging_obj=logging_obj,
model_response=model_response, model_response=model_response,
api_base=api_base, api_base=api_base,
@ -499,8 +535,9 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=max_retries, max_retries=max_retries,
organization=organization, organization=organization,
drop_params=drop_params, drop_params=drop_params,
fake_stream=fake_stream,
) )
elif optional_params.get("stream", False): elif stream is True and fake_stream is False:
return self.streaming( return self.streaming(
logging_obj=logging_obj, logging_obj=logging_obj,
headers=headers, headers=headers,
@ -512,6 +549,7 @@ class OpenAIChatCompletion(BaseLLM):
client=client, client=client,
max_retries=max_retries, max_retries=max_retries,
organization=organization, organization=organization,
stream_options=stream_options,
) )
else: else:
if not isinstance(max_retries, int): if not isinstance(max_retries, int):
@ -557,16 +595,26 @@ class OpenAIChatCompletion(BaseLLM):
original_response=stringified_response, original_response=stringified_response,
additional_args={"complete_input_dict": data}, additional_args={"complete_input_dict": data},
) )
return convert_to_model_response_object(
final_response_obj = convert_to_model_response_object(
response_object=stringified_response, response_object=stringified_response,
model_response_object=model_response, model_response_object=model_response,
_response_headers=headers, _response_headers=headers,
) )
if fake_stream is True:
return self.mock_streaming(
response=cast(ModelResponse, final_response_obj),
logging_obj=logging_obj,
model=model,
stream_options=stream_options,
)
return final_response_obj
except openai.UnprocessableEntityError as e: except openai.UnprocessableEntityError as e:
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800 ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
if litellm.drop_params is True or drop_params is True: if litellm.drop_params is True or drop_params is True:
optional_params = drop_params_from_unprocessable_entity_error( inference_params = drop_params_from_unprocessable_entity_error(
e, optional_params e, inference_params
) )
else: else:
raise e raise e
@ -623,6 +671,7 @@ class OpenAIChatCompletion(BaseLLM):
async def acompletion( async def acompletion(
self, self,
data: dict, data: dict,
model: str,
model_response: ModelResponse, model_response: ModelResponse,
logging_obj: LiteLLMLoggingObj, logging_obj: LiteLLMLoggingObj,
timeout: Union[float, httpx.Timeout], timeout: Union[float, httpx.Timeout],
@ -633,6 +682,8 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=None, max_retries=None,
headers=None, headers=None,
drop_params: Optional[bool] = None, drop_params: Optional[bool] = None,
stream_options: Optional[dict] = None,
fake_stream: bool = False,
): ):
response = None response = None
for _ in range( for _ in range(
@ -667,6 +718,7 @@ class OpenAIChatCompletion(BaseLLM):
openai_aclient=openai_aclient, data=data, timeout=timeout openai_aclient=openai_aclient, data=data, timeout=timeout
) )
stringified_response = response.model_dump() stringified_response = response.model_dump()
logging_obj.post_call( logging_obj.post_call(
input=data["messages"], input=data["messages"],
api_key=api_key, api_key=api_key,
@ -674,12 +726,22 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data}, additional_args={"complete_input_dict": data},
) )
logging_obj.model_call_details["response_headers"] = headers logging_obj.model_call_details["response_headers"] = headers
return convert_to_model_response_object( final_response_obj = convert_to_model_response_object(
response_object=stringified_response, response_object=stringified_response,
model_response_object=model_response, model_response_object=model_response,
hidden_params={"headers": headers}, hidden_params={"headers": headers},
_response_headers=headers, _response_headers=headers,
) )
if fake_stream is True:
return self.mock_streaming(
response=cast(ModelResponse, final_response_obj),
logging_obj=logging_obj,
model=model,
stream_options=stream_options,
)
return final_response_obj
except openai.UnprocessableEntityError as e: except openai.UnprocessableEntityError as e:
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800 ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
if litellm.drop_params is True or drop_params is True: if litellm.drop_params is True or drop_params is True:
@ -710,7 +772,11 @@ class OpenAIChatCompletion(BaseLLM):
client=None, client=None,
max_retries=None, max_retries=None,
headers=None, headers=None,
stream_options: Optional[dict] = None,
): ):
data["stream"] = True
if stream_options is not None:
data["stream_options"] = stream_options
openai_client: OpenAI = self._get_openai_client( # type: ignore openai_client: OpenAI = self._get_openai_client( # type: ignore
is_async=False, is_async=False,
api_key=api_key, api_key=api_key,
@ -761,8 +827,12 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=None, max_retries=None,
headers=None, headers=None,
drop_params: Optional[bool] = None, drop_params: Optional[bool] = None,
stream_options: Optional[dict] = None,
): ):
response = None response = None
data["stream"] = True
if stream_options is not None:
data["stream_options"] = stream_options
for _ in range(2): for _ in range(2):
try: try:
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore

View file

@ -284,7 +284,9 @@ class OpenAILikeChatHandler(OpenAILikeBase):
if isinstance(provider_config, OpenAIGPTConfig) or isinstance( if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
provider_config, OpenAIConfig provider_config, OpenAIConfig
): ):
messages = provider_config._transform_messages(messages) messages = provider_config._transform_messages(
messages=messages, model=model
)
data = { data = {
"model": model, "model": model,

View file

@ -139,11 +139,6 @@ class PredibaseConfig(BaseConfig):
"Predibase transformation currently done in handler.py. Need to migrate to this file." "Predibase transformation currently done in handler.py. Need to migrate to this file."
) )
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
return messages
def transform_request( def transform_request(
self, self,
model: str, model: str,

View file

@ -130,11 +130,6 @@ class ReplicateConfig(BaseConfig):
return split_model[1] return split_model[1]
return model return model
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
return messages
def get_error_class( def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException: ) -> BaseLLMException:

View file

@ -57,12 +57,6 @@ class SagemakerConfig(BaseConfig):
def get_config(cls): def get_config(cls):
return super().get_config() return super().get_config()
def _transform_messages(
self,
messages: List[AllMessageValues],
) -> List[AllMessageValues]:
return messages
def get_error_class( def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, Headers] self, error_message: str, status_code: int, headers: Union[dict, Headers]
) -> BaseLLMException: ) -> BaseLLMException:

View file

@ -240,12 +240,6 @@ class IBMWatsonXAIConfig(BaseConfig):
"us-south", "us-south",
] ]
def _transform_messages(
self,
messages: List[AllMessageValues],
) -> List[AllMessageValues]:
return messages
def get_error_class( def get_error_class(
self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers] self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
) -> BaseLLMException: ) -> BaseLLMException:

View file

@ -13,7 +13,8 @@
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true, "supports_audio_output": true,
"supports_prompt_caching": true, "supports_prompt_caching": true,
"supports_response_schema": true "supports_response_schema": true,
"supports_system_messages": true
}, },
"sambanova/Meta-Llama-3.1-8B-Instruct": { "sambanova/Meta-Llama-3.1-8B-Instruct": {
"max_tokens": 16000, "max_tokens": 16000,
@ -94,7 +95,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o": { "gpt-4o": {
"max_tokens": 16384, "max_tokens": 16384,
@ -109,7 +111,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-audio-preview": { "gpt-4o-audio-preview": {
"max_tokens": 16384, "max_tokens": 16384,
@ -124,7 +127,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-audio-preview-2024-10-01": { "gpt-4o-audio-preview-2024-10-01": {
"max_tokens": 16384, "max_tokens": 16384,
@ -139,7 +143,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-mini-audio-preview-2024-12-17": { "gpt-4o-mini-audio-preview-2024-12-17": {
"max_tokens": 16384, "max_tokens": 16384,
@ -154,7 +159,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-mini": { "gpt-4o-mini": {
"max_tokens": 16384, "max_tokens": 16384,
@ -169,7 +175,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-mini-2024-07-18": { "gpt-4o-mini-2024-07-18": {
"max_tokens": 16384, "max_tokens": 16384,
@ -184,7 +191,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"o1": { "o1": {
"max_tokens": 100000, "max_tokens": 100000,
@ -198,7 +206,9 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true
}, },
"o1-mini": { "o1-mini": {
"max_tokens": 65536, "max_tokens": 65536,
@ -209,8 +219,6 @@
"cache_read_input_token_cost": 0.0000015, "cache_read_input_token_cost": 0.0000015,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true
}, },
@ -223,8 +231,6 @@
"cache_read_input_token_cost": 0.0000015, "cache_read_input_token_cost": 0.0000015,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true
}, },
@ -237,8 +243,6 @@
"cache_read_input_token_cost": 0.0000075, "cache_read_input_token_cost": 0.0000075,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true
}, },
@ -251,8 +255,6 @@
"cache_read_input_token_cost": 0.0000075, "cache_read_input_token_cost": 0.0000075,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true
}, },
@ -268,7 +270,9 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true
}, },
"chatgpt-4o-latest": { "chatgpt-4o-latest": {
"max_tokens": 4096, "max_tokens": 4096,
@ -281,7 +285,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-2024-05-13": { "gpt-4o-2024-05-13": {
"max_tokens": 4096, "max_tokens": 4096,
@ -294,7 +299,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-2024-08-06": { "gpt-4o-2024-08-06": {
"max_tokens": 16384, "max_tokens": 16384,
@ -309,7 +315,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-2024-11-20": { "gpt-4o-2024-11-20": {
"max_tokens": 16384, "max_tokens": 16384,
@ -324,7 +331,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-realtime-preview-2024-10-01": { "gpt-4o-realtime-preview-2024-10-01": {
"max_tokens": 4096, "max_tokens": 4096,
@ -341,7 +349,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-realtime-preview": { "gpt-4o-realtime-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -357,7 +366,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-realtime-preview-2024-12-17": { "gpt-4o-realtime-preview-2024-12-17": {
"max_tokens": 4096, "max_tokens": 4096,
@ -373,7 +383,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-mini-realtime-preview": { "gpt-4o-mini-realtime-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -390,7 +401,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-mini-realtime-preview-2024-12-17": { "gpt-4o-mini-realtime-preview-2024-12-17": {
"max_tokens": 4096, "max_tokens": 4096,
@ -407,7 +419,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4-turbo-preview": { "gpt-4-turbo-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -419,7 +432,8 @@
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-0314": { "gpt-4-0314": {
"max_tokens": 4096, "max_tokens": 4096,
@ -429,7 +443,8 @@
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-0613": { "gpt-4-0613": {
"max_tokens": 4096, "max_tokens": 4096,
@ -440,7 +455,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-32k": { "gpt-4-32k": {
"max_tokens": 4096, "max_tokens": 4096,
@ -450,7 +466,8 @@
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-32k-0314": { "gpt-4-32k-0314": {
"max_tokens": 4096, "max_tokens": 4096,
@ -460,7 +477,8 @@
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-32k-0613": { "gpt-4-32k-0613": {
"max_tokens": 4096, "max_tokens": 4096,
@ -470,7 +488,8 @@
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-turbo": { "gpt-4-turbo": {
"max_tokens": 4096, "max_tokens": 4096,
@ -483,7 +502,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-turbo-2024-04-09": { "gpt-4-turbo-2024-04-09": {
"max_tokens": 4096, "max_tokens": 4096,
@ -496,7 +516,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-1106-preview": { "gpt-4-1106-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -508,7 +529,8 @@
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-0125-preview": { "gpt-4-0125-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -520,7 +542,8 @@
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-vision-preview": { "gpt-4-vision-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -531,7 +554,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-1106-vision-preview": { "gpt-4-1106-vision-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -542,7 +566,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo": { "gpt-3.5-turbo": {
"max_tokens": 4097, "max_tokens": 4097,
@ -553,7 +578,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-0301": { "gpt-3.5-turbo-0301": {
"max_tokens": 4097, "max_tokens": 4097,
@ -563,7 +589,8 @@
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-0613": { "gpt-3.5-turbo-0613": {
"max_tokens": 4097, "max_tokens": 4097,
@ -574,7 +601,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-1106": { "gpt-3.5-turbo-1106": {
"max_tokens": 16385, "max_tokens": 16385,
@ -586,7 +614,8 @@
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-0125": { "gpt-3.5-turbo-0125": {
"max_tokens": 16385, "max_tokens": 16385,
@ -598,7 +627,8 @@
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-16k": { "gpt-3.5-turbo-16k": {
"max_tokens": 16385, "max_tokens": 16385,
@ -608,7 +638,8 @@
"output_cost_per_token": 0.000004, "output_cost_per_token": 0.000004,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-16k-0613": { "gpt-3.5-turbo-16k-0613": {
"max_tokens": 16385, "max_tokens": 16385,
@ -618,7 +649,8 @@
"output_cost_per_token": 0.000004, "output_cost_per_token": 0.000004,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"ft:gpt-3.5-turbo": { "ft:gpt-3.5-turbo": {
"max_tokens": 4096, "max_tokens": 4096,
@ -627,7 +659,8 @@
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006, "output_cost_per_token": 0.000006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_system_messages": true
}, },
"ft:gpt-3.5-turbo-0125": { "ft:gpt-3.5-turbo-0125": {
"max_tokens": 4096, "max_tokens": 4096,
@ -636,7 +669,8 @@
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006, "output_cost_per_token": 0.000006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_system_messages": true
}, },
"ft:gpt-3.5-turbo-1106": { "ft:gpt-3.5-turbo-1106": {
"max_tokens": 4096, "max_tokens": 4096,
@ -645,7 +679,8 @@
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006, "output_cost_per_token": 0.000006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_system_messages": true
}, },
"ft:gpt-3.5-turbo-0613": { "ft:gpt-3.5-turbo-0613": {
"max_tokens": 4096, "max_tokens": 4096,
@ -654,7 +689,8 @@
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006, "output_cost_per_token": 0.000006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_system_messages": true
}, },
"ft:gpt-4-0613": { "ft:gpt-4-0613": {
"max_tokens": 4096, "max_tokens": 4096,
@ -665,7 +701,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing" "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing",
"supports_system_messages": true
}, },
"ft:gpt-4o-2024-08-06": { "ft:gpt-4o-2024-08-06": {
"max_tokens": 16384, "max_tokens": 16384,
@ -678,7 +715,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true "supports_vision": true,
"supports_system_messages": true
}, },
"ft:gpt-4o-2024-11-20": { "ft:gpt-4o-2024-11-20": {
"max_tokens": 16384, "max_tokens": 16384,
@ -693,7 +731,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"ft:gpt-4o-mini-2024-07-18": { "ft:gpt-4o-mini-2024-07-18": {
"max_tokens": 16384, "max_tokens": 16384,
@ -708,7 +747,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"ft:davinci-002": { "ft:davinci-002": {
"max_tokens": 16384, "max_tokens": 16384,
@ -3166,6 +3206,42 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_vision": true, "supports_vision": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_audio_output": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
},
"gemini/gemini-2.0-flash-exp": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"litellm_provider": "gemini",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"tpm": 4000000,
"rpm": 10,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
}, },
"vertex_ai/claude-3-sonnet": { "vertex_ai/claude-3-sonnet": {

View file

@ -74,11 +74,7 @@ class ProviderField(TypedDict):
field_value: str field_value: str
class ModelInfo(TypedDict, total=False): class ModelInfoBase(TypedDict, total=False):
"""
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
"""
key: Required[str] # the key in litellm.model_cost which is returned key: Required[str] # the key in litellm.model_cost which is returned
max_tokens: Required[Optional[int]] max_tokens: Required[Optional[int]]
@ -119,7 +115,6 @@ class ModelInfo(TypedDict, total=False):
"completion", "embedding", "image_generation", "chat", "audio_transcription" "completion", "embedding", "image_generation", "chat", "audio_transcription"
] ]
] ]
supported_openai_params: Required[Optional[List[str]]]
supports_system_messages: Optional[bool] supports_system_messages: Optional[bool]
supports_response_schema: Optional[bool] supports_response_schema: Optional[bool]
supports_vision: Optional[bool] supports_vision: Optional[bool]
@ -133,6 +128,14 @@ class ModelInfo(TypedDict, total=False):
rpm: Optional[int] rpm: Optional[int]
class ModelInfo(ModelInfoBase, total=False):
"""
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
"""
supported_openai_params: Required[Optional[List[str]]]
class GenericStreamingChunk(TypedDict, total=False): class GenericStreamingChunk(TypedDict, total=False):
text: Required[str] text: Required[str]
tool_use: Optional[ChatCompletionToolCallChunk] tool_use: Optional[ChatCompletionToolCallChunk]

View file

@ -132,6 +132,7 @@ from litellm.types.utils import (
LlmProviders, LlmProviders,
Message, Message,
ModelInfo, ModelInfo,
ModelInfoBase,
ModelResponse, ModelResponse,
ModelResponseStream, ModelResponseStream,
ProviderField, ProviderField,
@ -1645,17 +1646,11 @@ def supports_system_messages(model: str, custom_llm_provider: Optional[str]) ->
Raises: Raises:
Exception: If the given model is not found in model_prices_and_context_window.json. Exception: If the given model is not found in model_prices_and_context_window.json.
""" """
try: return _supports_factory(
model_info = litellm.get_model_info( model=model,
model=model, custom_llm_provider=custom_llm_provider custom_llm_provider=custom_llm_provider,
) key="supports_system_messages",
if model_info.get("supports_system_messages", False) is True: )
return True
return False
except Exception:
raise Exception(
f"Model not supports system messages. You passed model={model}, custom_llm_provider={custom_llm_provider}."
)
def supports_response_schema(model: str, custom_llm_provider: Optional[str]) -> bool: def supports_response_schema(model: str, custom_llm_provider: Optional[str]) -> bool:
@ -1684,25 +1679,11 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) ->
if custom_llm_provider in PROVIDERS_GLOBALLY_SUPPORT_RESPONSE_SCHEMA: if custom_llm_provider in PROVIDERS_GLOBALLY_SUPPORT_RESPONSE_SCHEMA:
return True return True
try: return _supports_factory(
## GET MODEL INFO model=model,
model_info = litellm.get_model_info( custom_llm_provider=custom_llm_provider,
model=model, custom_llm_provider=custom_llm_provider key="supports_response_schema",
) )
if model_info.get("supports_response_schema", False) is True:
return True
except Exception:
## check if provider supports response schema globally
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider=custom_llm_provider,
request_type="chat_completion",
)
if supported_params is not None and "response_schema" in supported_params:
return True
return False
def supports_function_calling( def supports_function_calling(
@ -1721,23 +1702,11 @@ def supports_function_calling(
Raises: Raises:
Exception: If the given model is not found or there's an error in retrieval. Exception: If the given model is not found or there's an error in retrieval.
""" """
try: return _supports_factory(
model, custom_llm_provider, _, _ = litellm.get_llm_provider( model=model,
model=model, custom_llm_provider=custom_llm_provider custom_llm_provider=custom_llm_provider,
) key="supports_function_calling",
)
## CHECK IF MODEL SUPPORTS FUNCTION CALLING ##
model_info = litellm.get_model_info(
model=model, custom_llm_provider=custom_llm_provider
)
if model_info.get("supports_function_calling", False) is True:
return True
return False
except Exception as e:
raise Exception(
f"Model not found or error in checking function calling support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
)
def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str) -> bool: def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str) -> bool:
@ -1759,7 +1728,7 @@ def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str)
model=model, custom_llm_provider=custom_llm_provider model=model, custom_llm_provider=custom_llm_provider
) )
model_info = litellm.get_model_info( model_info = _get_model_info_helper(
model=model, custom_llm_provider=custom_llm_provider model=model, custom_llm_provider=custom_llm_provider
) )
@ -1767,9 +1736,10 @@ def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str)
return True return True
return False return False
except Exception as e: except Exception as e:
raise Exception( verbose_logger.debug(
f"Model not found or error in checking {key} support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}" f"Model not found or error in checking {key} support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
) )
return False
def supports_audio_input(model: str, custom_llm_provider: Optional[str] = None) -> bool: def supports_audio_input(model: str, custom_llm_provider: Optional[str] = None) -> bool:
@ -4196,9 +4166,239 @@ def _get_potential_model_names(
) )
def get_model_info( # noqa: PLR0915 def _get_max_position_embeddings(model_name: str) -> Optional[int]:
# Construct the URL for the config.json file
config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
try:
# Make the HTTP request to get the raw JSON file
response = litellm.module_level_client.get(config_url)
response.raise_for_status() # Raise an exception for bad responses (4xx or 5xx)
# Parse the JSON response
config_json = response.json()
# Extract and return the max_position_embeddings
max_position_embeddings = config_json.get("max_position_embeddings")
if max_position_embeddings is not None:
return max_position_embeddings
else:
return None
except Exception:
return None
def _get_model_info_helper( # noqa: PLR0915
model: str, custom_llm_provider: Optional[str] = None model: str, custom_llm_provider: Optional[str] = None
) -> ModelInfo: ) -> ModelInfoBase:
"""
Helper for 'get_model_info'. Separated out to avoid infinite loop caused by returning 'supported_openai_param's
"""
try:
azure_llms = {**litellm.azure_llms, **litellm.azure_embedding_models}
if model in azure_llms:
model = azure_llms[model]
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai_beta":
custom_llm_provider = "vertex_ai"
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
if "meta/" + model in litellm.vertex_llama3_models:
model = "meta/" + model
elif model + "@latest" in litellm.vertex_mistral_models:
model = model + "@latest"
elif model + "@latest" in litellm.vertex_ai_ai21_models:
model = model + "@latest"
##########################
potential_model_names = _get_potential_model_names(
model=model, custom_llm_provider=custom_llm_provider
)
combined_model_name = potential_model_names["combined_model_name"]
stripped_model_name = potential_model_names["stripped_model_name"]
combined_stripped_model_name = potential_model_names[
"combined_stripped_model_name"
]
split_model = potential_model_names["split_model"]
custom_llm_provider = potential_model_names["custom_llm_provider"]
#########################
if custom_llm_provider == "huggingface":
max_tokens = _get_max_position_embeddings(model_name=model)
return ModelInfoBase(
key=model,
max_tokens=max_tokens, # type: ignore
max_input_tokens=None,
max_output_tokens=None,
input_cost_per_token=0,
output_cost_per_token=0,
litellm_provider="huggingface",
mode="chat",
supports_system_messages=None,
supports_response_schema=None,
supports_function_calling=None,
supports_assistant_prefill=None,
supports_prompt_caching=None,
supports_pdf_input=None,
)
elif custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
return litellm.OllamaConfig().get_model_info(model)
else:
"""
Check if: (in order of specificity)
1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq"
2. 'model' in litellm.model_cost. Checks "gemini-1.5-pro-002" in litellm.model_cost if model="gemini-1.5-pro-002" and custom_llm_provider=None
3. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given.
4. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given.
5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192"
"""
_model_info: Optional[Dict[str, Any]] = None
key: Optional[str] = None
if combined_model_name in litellm.model_cost:
key = combined_model_name
_model_info = _get_model_info_from_model_cost(key=key)
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and model in litellm.model_cost:
key = model
_model_info = _get_model_info_from_model_cost(key=key)
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if (
_model_info is None
and combined_stripped_model_name in litellm.model_cost
):
key = combined_stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and stripped_model_name in litellm.model_cost:
key = stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and split_model in litellm.model_cost:
key = split_model
_model_info = _get_model_info_from_model_cost(key=key)
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None or key is None:
raise ValueError(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
)
## PROVIDER-SPECIFIC INFORMATION
if custom_llm_provider == "predibase":
_model_info["supports_response_schema"] = True
_input_cost_per_token: Optional[float] = _model_info.get(
"input_cost_per_token"
)
if _input_cost_per_token is None:
# default value to 0, be noisy about this
verbose_logger.debug(
"model={}, custom_llm_provider={} has no input_cost_per_token in model_cost_map. Defaulting to 0.".format(
model, custom_llm_provider
)
)
_input_cost_per_token = 0
_output_cost_per_token: Optional[float] = _model_info.get(
"output_cost_per_token"
)
if _output_cost_per_token is None:
# default value to 0, be noisy about this
verbose_logger.debug(
"model={}, custom_llm_provider={} has no output_cost_per_token in model_cost_map. Defaulting to 0.".format(
model, custom_llm_provider
)
)
_output_cost_per_token = 0
return ModelInfoBase(
key=key,
max_tokens=_model_info.get("max_tokens", None),
max_input_tokens=_model_info.get("max_input_tokens", None),
max_output_tokens=_model_info.get("max_output_tokens", None),
input_cost_per_token=_input_cost_per_token,
cache_creation_input_token_cost=_model_info.get(
"cache_creation_input_token_cost", None
),
cache_read_input_token_cost=_model_info.get(
"cache_read_input_token_cost", None
),
input_cost_per_character=_model_info.get(
"input_cost_per_character", None
),
input_cost_per_token_above_128k_tokens=_model_info.get(
"input_cost_per_token_above_128k_tokens", None
),
input_cost_per_query=_model_info.get("input_cost_per_query", None),
input_cost_per_second=_model_info.get("input_cost_per_second", None),
input_cost_per_audio_token=_model_info.get(
"input_cost_per_audio_token", None
),
output_cost_per_token=_output_cost_per_token,
output_cost_per_audio_token=_model_info.get(
"output_cost_per_audio_token", None
),
output_cost_per_character=_model_info.get(
"output_cost_per_character", None
),
output_cost_per_token_above_128k_tokens=_model_info.get(
"output_cost_per_token_above_128k_tokens", None
),
output_cost_per_character_above_128k_tokens=_model_info.get(
"output_cost_per_character_above_128k_tokens", None
),
output_cost_per_second=_model_info.get("output_cost_per_second", None),
output_cost_per_image=_model_info.get("output_cost_per_image", None),
output_vector_size=_model_info.get("output_vector_size", None),
litellm_provider=_model_info.get(
"litellm_provider", custom_llm_provider
),
mode=_model_info.get("mode"), # type: ignore
supports_system_messages=_model_info.get(
"supports_system_messages", None
),
supports_response_schema=_model_info.get(
"supports_response_schema", None
),
supports_vision=_model_info.get("supports_vision", False),
supports_function_calling=_model_info.get(
"supports_function_calling", False
),
supports_assistant_prefill=_model_info.get(
"supports_assistant_prefill", False
),
supports_prompt_caching=_model_info.get(
"supports_prompt_caching", False
),
supports_audio_input=_model_info.get("supports_audio_input", False),
supports_audio_output=_model_info.get("supports_audio_output", False),
supports_pdf_input=_model_info.get("supports_pdf_input", False),
tpm=_model_info.get("tpm", None),
rpm=_model_info.get("rpm", None),
)
except Exception as e:
if "OllamaError" in str(e):
raise e
raise Exception(
"This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json.".format(
model, custom_llm_provider
)
)
def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo:
""" """
Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model. Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model.
@ -4265,241 +4465,20 @@ def get_model_info( # noqa: PLR0915
"supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"] "supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]
} }
""" """
supported_openai_params: Union[List[str], None] = [] supported_openai_params = litellm.get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider
)
def _get_max_position_embeddings(model_name): _model_info = _get_model_info_helper(
# Construct the URL for the config.json file model=model,
config_url = f"https://huggingface.co/{model_name}/raw/main/config.json" custom_llm_provider=custom_llm_provider,
)
try: returned_model_info = ModelInfo(
# Make the HTTP request to get the raw JSON file **_model_info, supported_openai_params=supported_openai_params
response = litellm.module_level_client.get(config_url) )
response.raise_for_status() # Raise an exception for bad responses (4xx or 5xx)
# Parse the JSON response return returned_model_info
config_json = response.json()
# Extract and return the max_position_embeddings
max_position_embeddings = config_json.get("max_position_embeddings")
if max_position_embeddings is not None:
return max_position_embeddings
else:
return None
except Exception:
return None
try:
azure_llms = {**litellm.azure_llms, **litellm.azure_embedding_models}
if model in azure_llms:
model = azure_llms[model]
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai_beta":
custom_llm_provider = "vertex_ai"
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
if "meta/" + model in litellm.vertex_llama3_models:
model = "meta/" + model
elif model + "@latest" in litellm.vertex_mistral_models:
model = model + "@latest"
elif model + "@latest" in litellm.vertex_ai_ai21_models:
model = model + "@latest"
##########################
potential_model_names = _get_potential_model_names(
model=model, custom_llm_provider=custom_llm_provider
)
combined_model_name = potential_model_names["combined_model_name"]
stripped_model_name = potential_model_names["stripped_model_name"]
combined_stripped_model_name = potential_model_names[
"combined_stripped_model_name"
]
split_model = potential_model_names["split_model"]
custom_llm_provider = potential_model_names["custom_llm_provider"]
#########################
supported_openai_params = litellm.get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider
)
if custom_llm_provider == "huggingface":
max_tokens = _get_max_position_embeddings(model_name=model)
return ModelInfo(
key=model,
max_tokens=max_tokens, # type: ignore
max_input_tokens=None,
max_output_tokens=None,
input_cost_per_token=0,
output_cost_per_token=0,
litellm_provider="huggingface",
mode="chat",
supported_openai_params=supported_openai_params,
supports_system_messages=None,
supports_response_schema=None,
supports_function_calling=None,
supports_assistant_prefill=None,
supports_prompt_caching=None,
supports_pdf_input=None,
)
elif custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
return litellm.OllamaConfig().get_model_info(model)
else:
"""
Check if: (in order of specificity)
1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq"
2. 'model' in litellm.model_cost. Checks "gemini-1.5-pro-002" in litellm.model_cost if model="gemini-1.5-pro-002" and custom_llm_provider=None
3. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given.
4. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given.
5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192"
"""
_model_info: Optional[Dict[str, Any]] = None
key: Optional[str] = None
if combined_model_name in litellm.model_cost:
key = combined_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and model in litellm.model_cost:
key = model
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if (
_model_info is None
and combined_stripped_model_name in litellm.model_cost
):
key = combined_stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and stripped_model_name in litellm.model_cost:
key = stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and split_model in litellm.model_cost:
key = split_model
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None or key is None:
raise ValueError(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
)
## PROVIDER-SPECIFIC INFORMATION
if custom_llm_provider == "predibase":
_model_info["supports_response_schema"] = True
_input_cost_per_token: Optional[float] = _model_info.get(
"input_cost_per_token"
)
if _input_cost_per_token is None:
# default value to 0, be noisy about this
verbose_logger.debug(
"model={}, custom_llm_provider={} has no input_cost_per_token in model_cost_map. Defaulting to 0.".format(
model, custom_llm_provider
)
)
_input_cost_per_token = 0
_output_cost_per_token: Optional[float] = _model_info.get(
"output_cost_per_token"
)
if _output_cost_per_token is None:
# default value to 0, be noisy about this
verbose_logger.debug(
"model={}, custom_llm_provider={} has no output_cost_per_token in model_cost_map. Defaulting to 0.".format(
model, custom_llm_provider
)
)
_output_cost_per_token = 0
return ModelInfo(
key=key,
max_tokens=_model_info.get("max_tokens", None),
max_input_tokens=_model_info.get("max_input_tokens", None),
max_output_tokens=_model_info.get("max_output_tokens", None),
input_cost_per_token=_input_cost_per_token,
cache_creation_input_token_cost=_model_info.get(
"cache_creation_input_token_cost", None
),
cache_read_input_token_cost=_model_info.get(
"cache_read_input_token_cost", None
),
input_cost_per_character=_model_info.get(
"input_cost_per_character", None
),
input_cost_per_token_above_128k_tokens=_model_info.get(
"input_cost_per_token_above_128k_tokens", None
),
input_cost_per_query=_model_info.get("input_cost_per_query", None),
input_cost_per_second=_model_info.get("input_cost_per_second", None),
input_cost_per_audio_token=_model_info.get(
"input_cost_per_audio_token", None
),
output_cost_per_token=_output_cost_per_token,
output_cost_per_audio_token=_model_info.get(
"output_cost_per_audio_token", None
),
output_cost_per_character=_model_info.get(
"output_cost_per_character", None
),
output_cost_per_token_above_128k_tokens=_model_info.get(
"output_cost_per_token_above_128k_tokens", None
),
output_cost_per_character_above_128k_tokens=_model_info.get(
"output_cost_per_character_above_128k_tokens", None
),
output_cost_per_second=_model_info.get("output_cost_per_second", None),
output_cost_per_image=_model_info.get("output_cost_per_image", None),
output_vector_size=_model_info.get("output_vector_size", None),
litellm_provider=_model_info.get(
"litellm_provider", custom_llm_provider
),
mode=_model_info.get("mode"), # type: ignore
supported_openai_params=supported_openai_params,
supports_system_messages=_model_info.get(
"supports_system_messages", None
),
supports_response_schema=_model_info.get(
"supports_response_schema", None
),
supports_vision=_model_info.get("supports_vision", False),
supports_function_calling=_model_info.get(
"supports_function_calling", False
),
supports_assistant_prefill=_model_info.get(
"supports_assistant_prefill", False
),
supports_prompt_caching=_model_info.get(
"supports_prompt_caching", False
),
supports_audio_input=_model_info.get("supports_audio_input", False),
supports_audio_output=_model_info.get("supports_audio_output", False),
supports_pdf_input=_model_info.get("supports_pdf_input", False),
tpm=_model_info.get("tpm", None),
rpm=_model_info.get("rpm", None),
)
except Exception as e:
if "OllamaError" in str(e):
raise e
raise Exception(
"This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json.".format(
model, custom_llm_provider
)
)
def json_schema_type(python_type_name: str): def json_schema_type(python_type_name: str):

View file

@ -13,7 +13,8 @@
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true, "supports_audio_output": true,
"supports_prompt_caching": true, "supports_prompt_caching": true,
"supports_response_schema": true "supports_response_schema": true,
"supports_system_messages": true
}, },
"sambanova/Meta-Llama-3.1-8B-Instruct": { "sambanova/Meta-Llama-3.1-8B-Instruct": {
"max_tokens": 16000, "max_tokens": 16000,
@ -94,7 +95,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o": { "gpt-4o": {
"max_tokens": 16384, "max_tokens": 16384,
@ -109,7 +111,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-audio-preview": { "gpt-4o-audio-preview": {
"max_tokens": 16384, "max_tokens": 16384,
@ -124,7 +127,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-audio-preview-2024-10-01": { "gpt-4o-audio-preview-2024-10-01": {
"max_tokens": 16384, "max_tokens": 16384,
@ -139,7 +143,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-mini-audio-preview-2024-12-17": { "gpt-4o-mini-audio-preview-2024-12-17": {
"max_tokens": 16384, "max_tokens": 16384,
@ -154,7 +159,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-mini": { "gpt-4o-mini": {
"max_tokens": 16384, "max_tokens": 16384,
@ -169,7 +175,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-mini-2024-07-18": { "gpt-4o-mini-2024-07-18": {
"max_tokens": 16384, "max_tokens": 16384,
@ -184,7 +191,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"o1": { "o1": {
"max_tokens": 100000, "max_tokens": 100000,
@ -198,7 +206,9 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true
}, },
"o1-mini": { "o1-mini": {
"max_tokens": 65536, "max_tokens": 65536,
@ -209,8 +219,6 @@
"cache_read_input_token_cost": 0.0000015, "cache_read_input_token_cost": 0.0000015,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true
}, },
@ -223,8 +231,6 @@
"cache_read_input_token_cost": 0.0000015, "cache_read_input_token_cost": 0.0000015,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true
}, },
@ -237,8 +243,6 @@
"cache_read_input_token_cost": 0.0000075, "cache_read_input_token_cost": 0.0000075,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true
}, },
@ -251,8 +255,6 @@
"cache_read_input_token_cost": 0.0000075, "cache_read_input_token_cost": 0.0000075,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true
}, },
@ -268,7 +270,9 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true
}, },
"chatgpt-4o-latest": { "chatgpt-4o-latest": {
"max_tokens": 4096, "max_tokens": 4096,
@ -281,7 +285,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-2024-05-13": { "gpt-4o-2024-05-13": {
"max_tokens": 4096, "max_tokens": 4096,
@ -294,7 +299,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-2024-08-06": { "gpt-4o-2024-08-06": {
"max_tokens": 16384, "max_tokens": 16384,
@ -309,7 +315,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-2024-11-20": { "gpt-4o-2024-11-20": {
"max_tokens": 16384, "max_tokens": 16384,
@ -324,7 +331,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4o-realtime-preview-2024-10-01": { "gpt-4o-realtime-preview-2024-10-01": {
"max_tokens": 4096, "max_tokens": 4096,
@ -341,7 +349,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-realtime-preview": { "gpt-4o-realtime-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -357,7 +366,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-realtime-preview-2024-12-17": { "gpt-4o-realtime-preview-2024-12-17": {
"max_tokens": 4096, "max_tokens": 4096,
@ -373,7 +383,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-mini-realtime-preview": { "gpt-4o-mini-realtime-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -390,7 +401,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4o-mini-realtime-preview-2024-12-17": { "gpt-4o-mini-realtime-preview-2024-12-17": {
"max_tokens": 4096, "max_tokens": 4096,
@ -407,7 +419,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_audio_input": true, "supports_audio_input": true,
"supports_audio_output": true "supports_audio_output": true,
"supports_system_messages": true
}, },
"gpt-4-turbo-preview": { "gpt-4-turbo-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -419,7 +432,8 @@
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-0314": { "gpt-4-0314": {
"max_tokens": 4096, "max_tokens": 4096,
@ -429,7 +443,8 @@
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-0613": { "gpt-4-0613": {
"max_tokens": 4096, "max_tokens": 4096,
@ -440,7 +455,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-32k": { "gpt-4-32k": {
"max_tokens": 4096, "max_tokens": 4096,
@ -450,7 +466,8 @@
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-32k-0314": { "gpt-4-32k-0314": {
"max_tokens": 4096, "max_tokens": 4096,
@ -460,7 +477,8 @@
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-32k-0613": { "gpt-4-32k-0613": {
"max_tokens": 4096, "max_tokens": 4096,
@ -470,7 +488,8 @@
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-turbo": { "gpt-4-turbo": {
"max_tokens": 4096, "max_tokens": 4096,
@ -483,7 +502,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-turbo-2024-04-09": { "gpt-4-turbo-2024-04-09": {
"max_tokens": 4096, "max_tokens": 4096,
@ -496,7 +516,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-1106-preview": { "gpt-4-1106-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -508,7 +529,8 @@
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-0125-preview": { "gpt-4-0125-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -520,7 +542,8 @@
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-vision-preview": { "gpt-4-vision-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -531,7 +554,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-4-1106-vision-preview": { "gpt-4-1106-vision-preview": {
"max_tokens": 4096, "max_tokens": 4096,
@ -542,7 +566,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo": { "gpt-3.5-turbo": {
"max_tokens": 4097, "max_tokens": 4097,
@ -553,7 +578,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-0301": { "gpt-3.5-turbo-0301": {
"max_tokens": 4097, "max_tokens": 4097,
@ -563,7 +589,8 @@
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-0613": { "gpt-3.5-turbo-0613": {
"max_tokens": 4097, "max_tokens": 4097,
@ -574,7 +601,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-1106": { "gpt-3.5-turbo-1106": {
"max_tokens": 16385, "max_tokens": 16385,
@ -586,7 +614,8 @@
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-0125": { "gpt-3.5-turbo-0125": {
"max_tokens": 16385, "max_tokens": 16385,
@ -598,7 +627,8 @@
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-16k": { "gpt-3.5-turbo-16k": {
"max_tokens": 16385, "max_tokens": 16385,
@ -608,7 +638,8 @@
"output_cost_per_token": 0.000004, "output_cost_per_token": 0.000004,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"gpt-3.5-turbo-16k-0613": { "gpt-3.5-turbo-16k-0613": {
"max_tokens": 16385, "max_tokens": 16385,
@ -618,7 +649,8 @@
"output_cost_per_token": 0.000004, "output_cost_per_token": 0.000004,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"ft:gpt-3.5-turbo": { "ft:gpt-3.5-turbo": {
"max_tokens": 4096, "max_tokens": 4096,
@ -627,7 +659,8 @@
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006, "output_cost_per_token": 0.000006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_system_messages": true
}, },
"ft:gpt-3.5-turbo-0125": { "ft:gpt-3.5-turbo-0125": {
"max_tokens": 4096, "max_tokens": 4096,
@ -636,7 +669,8 @@
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006, "output_cost_per_token": 0.000006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_system_messages": true
}, },
"ft:gpt-3.5-turbo-1106": { "ft:gpt-3.5-turbo-1106": {
"max_tokens": 4096, "max_tokens": 4096,
@ -645,7 +679,8 @@
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006, "output_cost_per_token": 0.000006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_system_messages": true
}, },
"ft:gpt-3.5-turbo-0613": { "ft:gpt-3.5-turbo-0613": {
"max_tokens": 4096, "max_tokens": 4096,
@ -654,7 +689,8 @@
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006, "output_cost_per_token": 0.000006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_system_messages": true
}, },
"ft:gpt-4-0613": { "ft:gpt-4-0613": {
"max_tokens": 4096, "max_tokens": 4096,
@ -665,7 +701,8 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing" "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing",
"supports_system_messages": true
}, },
"ft:gpt-4o-2024-08-06": { "ft:gpt-4o-2024-08-06": {
"max_tokens": 16384, "max_tokens": 16384,
@ -678,7 +715,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true "supports_vision": true,
"supports_system_messages": true
}, },
"ft:gpt-4o-2024-11-20": { "ft:gpt-4o-2024-11-20": {
"max_tokens": 16384, "max_tokens": 16384,
@ -693,7 +731,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"ft:gpt-4o-mini-2024-07-18": { "ft:gpt-4o-mini-2024-07-18": {
"max_tokens": 16384, "max_tokens": 16384,
@ -708,7 +747,8 @@
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_vision": true, "supports_vision": true,
"supports_prompt_caching": true "supports_prompt_caching": true,
"supports_system_messages": true
}, },
"ft:davinci-002": { "ft:davinci-002": {
"max_tokens": 16384, "max_tokens": 16384,
@ -3166,6 +3206,42 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_vision": true, "supports_vision": true,
"supports_response_schema": true, "supports_response_schema": true,
"supports_audio_output": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
},
"gemini/gemini-2.0-flash-exp": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"litellm_provider": "gemini",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"tpm": 4000000,
"rpm": 10,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
}, },
"vertex_ai/claude-3-sonnet": { "vertex_ai/claude-3-sonnet": {

View file

@ -17,14 +17,19 @@ import litellm
from litellm import Choices, Message, ModelResponse from litellm import Choices, Message, ModelResponse
@pytest.mark.parametrize("model", ["o1-preview", "o1-mini", "o1"])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_o1_handle_system_role(): async def test_o1_handle_system_role(model):
""" """
Tests that: Tests that:
- max_tokens is translated to 'max_completion_tokens' - max_tokens is translated to 'max_completion_tokens'
- role 'system' is translated to 'user' - role 'system' is translated to 'user'
""" """
from openai import AsyncOpenAI from openai import AsyncOpenAI
from litellm.utils import supports_system_messages
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True litellm.set_verbose = True
@ -35,9 +40,9 @@ async def test_o1_handle_system_role():
) as mock_client: ) as mock_client:
try: try:
await litellm.acompletion( await litellm.acompletion(
model="o1-preview", model=model,
max_tokens=10, max_tokens=10,
messages=[{"role": "system", "content": "Hello!"}], messages=[{"role": "system", "content": "Be a good bot!"}],
client=client, client=client,
) )
except Exception as e: except Exception as e:
@ -48,9 +53,73 @@ async def test_o1_handle_system_role():
print("request_body: ", request_body) print("request_body: ", request_body)
assert request_body["model"] == "o1-preview" assert request_body["model"] == model
assert request_body["max_completion_tokens"] == 10 assert request_body["max_completion_tokens"] == 10
assert request_body["messages"] == [{"role": "user", "content": "Hello!"}] if supports_system_messages(model, "openai"):
assert request_body["messages"] == [
{"role": "system", "content": "Be a good bot!"}
]
else:
assert request_body["messages"] == [
{"role": "user", "content": "Be a good bot!"}
]
@pytest.mark.parametrize(
"model, expected_tool_calling_support",
[("o1-preview", False), ("o1-mini", False), ("o1", True)],
)
@pytest.mark.asyncio
async def test_o1_handle_tool_calling_optional_params(
model, expected_tool_calling_support
):
"""
Tests that:
- max_tokens is translated to 'max_completion_tokens'
- role 'system' is translated to 'user'
"""
from openai import AsyncOpenAI
from litellm.utils import ProviderConfigManager
from litellm.types.utils import LlmProviders
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
config = ProviderConfigManager.get_provider_chat_config(
model=model, provider=LlmProviders.OPENAI
)
supported_params = config.get_supported_openai_params(model=model)
assert expected_tool_calling_support == ("tools" in supported_params)
# @pytest.mark.parametrize(
# "model",
# ["o1"], # "o1-preview", "o1-mini",
# )
# @pytest.mark.asyncio
# async def test_o1_handle_streaming_e2e(model):
# """
# Tests that:
# - max_tokens is translated to 'max_completion_tokens'
# - role 'system' is translated to 'user'
# """
# from openai import AsyncOpenAI
# from litellm.utils import ProviderConfigManager
# from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
# from litellm.types.utils import LlmProviders
# resp = litellm.completion(
# model=model,
# messages=[{"role": "user", "content": "Hello!"}],
# stream=True,
# )
# assert isinstance(resp, CustomStreamWrapper)
# for chunk in resp:
# print("chunk: ", chunk)
# assert True
@pytest.mark.asyncio @pytest.mark.asyncio

View file

@ -2072,6 +2072,7 @@ def test_openai_chat_completion_complete_response_call():
"azure/chatgpt-v-2", "azure/chatgpt-v-2",
"claude-3-haiku-20240307", "claude-3-haiku-20240307",
"o1-preview", "o1-preview",
"o1",
"azure/fake-o1-mini", "azure/fake-o1-mini",
], ],
) )