fix(health.md): add rerank model health check information (#7295)

* fix(health.md): add rerank model health check information

* build(model_prices_and_context_window.json): add gemini 2.0 for google ai studio - pricing + commercial rate limits

* build(model_prices_and_context_window.json): add gemini-2.0 supports audio output = true

* docs(team_model_add.md): clarify allowing teams to add models is an enterprise feature

* fix(o1_transformation.py): add support for 'n', 'response_format' and 'stop' params for o1 and 'stream_options' param for o1-mini

* build(model_prices_and_context_window.json): add 'supports_system_message' to supporting openai models

needed as o1-preview, and o1-mini models don't support 'system message

* fix(o1_transformation.py): translate system message based on if o1 model supports it

* fix(o1_transformation.py): return 'stream' param support if o1-mini/o1-preview

o1 currently doesn't support streaming, but the other model versions do

Fixes https://github.com/BerriAI/litellm/issues/7292

* fix(o1_transformation.py): return tool calling/response_format in supported params if model map says so

Fixes https://github.com/BerriAI/litellm/issues/7292

* fix: fix linting errors

* fix: update '_transform_messages'

* fix(o1_transformation.py): fix provider passed for supported param checks

* test(base_llm_unit_tests.py): skip test if api takes >5s to respond

* fix(utils.py): return false in 'supports_factory' if can't find value

* fix(o1_transformation.py): always return stream + stream_options as supported params + handle stream options being passed in for azure o1

* feat(openai.py): support stream faking natively in openai handler

Allows o1 calls to be faked for just the "o1" model, allows native streaming for o1-mini, o1-preview

 Fixes https://github.com/BerriAI/litellm/issues/7292

* fix(openai.py): use inference param instead of original optional param
This commit is contained in:
Krish Dholakia 2024-12-18 19:18:10 -08:00 committed by GitHub
parent e95820367f
commit 1a4910f6c0
34 changed files with 800 additions and 515 deletions

View file

@ -121,6 +121,20 @@ model_list:
mode: audio_speech
```
### Rerank Models
To run rerank health checks, specify the mode as "rerank" in your config for the relevant model.
```yaml
model_list:
- model_name: rerank-english-v3.0
litellm_params:
model: cohere/rerank-english-v3.0
api_key: os.environ/COHERE_API_KEY
model_info:
mode: rerank
```
### Batch Models (Azure Only)
For Azure models deployed as 'batch' models, set `mode: batch`.

View file

@ -1,4 +1,13 @@
# Allow Teams to Add Models
# ✨ Allow Teams to Add Models
:::info
This is an Enterprise feature.
[Enterprise Pricing](https://www.litellm.ai/#pricing)
[Contact us here to get a free trial](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
:::
Allow team to add a their own models/key for that project - so any OpenAI call they make uses their OpenAI key.

View file

@ -3144,7 +3144,9 @@ def prompt_factory(
else:
return gemini_text_image_pt(messages=messages)
elif custom_llm_provider == "mistral":
return litellm.MistralConfig()._transform_messages(messages=messages)
return litellm.MistralConfig()._transform_messages(
messages=messages, model=model
)
elif custom_llm_provider == "bedrock":
if "amazon.titan-text" in model:
return amazon_titan_pt(messages=messages)

View file

@ -260,12 +260,6 @@ class AnthropicTextConfig(BaseConfig):
return str(prompt)
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
"Not required"
raise NotImplementedError
def get_model_response_iterator(
self,
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],

View file

@ -57,6 +57,7 @@ class AzureOpenAIO1ChatCompletion(AzureChatCompletion):
client=None,
):
stream: Optional[bool] = optional_params.pop("stream", False)
stream_options: Optional[dict] = optional_params.pop("stream_options", None)
response = super().completion(
model,
messages,
@ -90,6 +91,7 @@ class AzureOpenAIO1ChatCompletion(AzureChatCompletion):
model=model,
custom_llm_provider="openai",
logging_obj=logging_obj,
stream_options=stream_options,
)
return streaming_response

View file

@ -2,11 +2,11 @@ from typing import List, Optional, Tuple
import litellm
from litellm._logging import verbose_logger
from litellm.llms.openai.openai import OpenAIConfig
from litellm.litellm_core_utils.prompt_templates.common_utils import (
_audio_or_image_in_message_content,
convert_content_list_to_str,
)
from litellm.llms.openai.openai import OpenAIConfig
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ProviderField
@ -33,6 +33,7 @@ class AzureAIStudioConfig(OpenAIConfig):
def _transform_messages(
self,
messages: List[AllMessageValues],
model: str,
) -> List:
"""
- Azure AI Studio doesn't support content as a list. This handles:

View file

@ -82,6 +82,14 @@ class BaseConfig(ABC):
and v is not None
}
def should_fake_stream(
self, model: str, custom_llm_provider: Optional[str] = None
) -> bool:
"""
Returns True if the model/provider should fake stream
"""
return False
@abstractmethod
def get_supported_openai_params(self, model: str) -> list:
pass

View file

@ -131,11 +131,6 @@ class ClarifaiConfig(BaseConfig):
headers["Authorization"] = f"Bearer {api_key}"
return headers
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
raise NotImplementedError
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:

View file

@ -158,11 +158,6 @@ class CloudflareChatConfig(BaseConfig):
message=error_message,
)
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
raise NotImplementedError
def get_model_response_iterator(
self,
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],

View file

@ -365,8 +365,3 @@ class CohereChatConfig(BaseConfig):
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
return CohereError(status_code=status_code, message=error_message)
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
raise NotImplementedError

View file

@ -121,12 +121,6 @@ class CohereTextConfig(BaseConfig):
api_key=api_key,
)
def _transform_messages(
self,
messages: List[AllMessageValues],
) -> List[AllMessageValues]:
raise NotImplementedError
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:

View file

@ -2,11 +2,12 @@
Handles the chat completion request for Databricks
"""
from typing import Any, Callable, Literal, Optional, Tuple, Union
from typing import Any, Callable, List, Literal, Optional, Tuple, Union, cast
from httpx._config import Timeout
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import CustomStreamingDecoder
from litellm.utils import ModelResponse
@ -44,7 +45,9 @@ class DatabricksChatCompletion(OpenAILikeChatHandler, DatabricksBase):
streaming_decoder: Optional[CustomStreamingDecoder] = None,
fake_stream: bool = False,
):
messages = DatabricksConfig()._transform_messages(messages) # type: ignore
messages = DatabricksConfig()._transform_messages(
messages=cast(List[AllMessageValues], messages), model=model
)
api_base, headers = self.databricks_validate_environment(
api_base=api_base,
api_key=api_key,

View file

@ -7,14 +7,14 @@ from typing import List, Optional, Union
from pydantic import BaseModel
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ProviderField
from ...openai_like.chat.transformation import OpenAILikeChatConfig
from litellm.litellm_core_utils.prompt_templates.common_utils import (
handle_messages_with_content_list_to_str_conversion,
strip_name_from_messages,
)
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ProviderField
from ...openai_like.chat.transformation import OpenAILikeChatConfig
class DatabricksConfig(OpenAILikeChatConfig):
@ -86,7 +86,7 @@ class DatabricksConfig(OpenAILikeChatConfig):
return False
def _transform_messages(
self, messages: List[AllMessageValues]
self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]:
"""
Databricks does not support:
@ -102,4 +102,4 @@ class DatabricksConfig(OpenAILikeChatConfig):
new_messages.append(_message)
new_messages = handle_messages_with_content_list_to_str_conversion(new_messages)
new_messages = strip_name_from_messages(new_messages)
return super()._transform_messages(new_messages)
return super()._transform_messages(messages=new_messages, model=model)

View file

@ -8,26 +8,26 @@ from typing import List, Optional, Tuple, Union
from pydantic import BaseModel
import litellm
from litellm.litellm_core_utils.prompt_templates.common_utils import (
handle_messages_with_content_list_to_str_conversion,
)
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
from ....utils import _remove_additional_properties, _remove_strict_from_schema
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
from litellm.litellm_core_utils.prompt_templates.common_utils import (
handle_messages_with_content_list_to_str_conversion,
)
class DeepSeekChatConfig(OpenAIGPTConfig):
def _transform_messages(
self, messages: List[AllMessageValues]
self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]:
"""
DeepSeek does not support content in list format.
"""
messages = handle_messages_with_content_list_to_str_conversion(messages)
return super()._transform_messages(messages)
return super()._transform_messages(messages=messages, model=model)
def _get_openai_compatible_provider_info(
self, api_base: Optional[str], api_key: Optional[str]

View file

@ -2,11 +2,12 @@
Handles the chat completion request for groq
"""
from typing import Any, Callable, Optional, Union
from typing import Any, Callable, List, Optional, Union, cast
from httpx._config import Timeout
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import CustomStreamingDecoder
from litellm.utils import ModelResponse
@ -42,7 +43,9 @@ class GroqChatCompletion(OpenAILikeChatHandler):
streaming_decoder: Optional[CustomStreamingDecoder] = None,
fake_stream: bool = False,
):
messages = GroqChatConfig()._transform_messages(messages) # type: ignore
messages = GroqChatConfig()._transform_messages(
messages=cast(List[AllMessageValues], messages), model=model
)
if optional_params.get("stream") is True:
fake_stream = GroqChatConfig()._should_fake_stream(optional_params)

View file

@ -61,7 +61,7 @@ class GroqChatConfig(OpenAIGPTConfig):
def get_config(cls):
return super().get_config()
def _transform_messages(self, messages: List[AllMessageValues]) -> List:
def _transform_messages(self, messages: List[AllMessageValues], model: str) -> List:
for idx, message in enumerate(messages):
"""
1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839

View file

@ -369,12 +369,6 @@ class HuggingfaceChatConfig(BaseConfig):
headers = {**headers, **default_headers}
return headers
def _transform_messages(
self,
messages: List[AllMessageValues],
) -> List[AllMessageValues]:
return messages
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:

View file

@ -9,11 +9,11 @@ Docs - https://docs.mistral.ai/api/
import types
from typing import List, Literal, Optional, Tuple, Union
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
from litellm.litellm_core_utils.prompt_templates.common_utils import (
handle_messages_with_content_list_to_str_conversion,
strip_none_values_from_message,
)
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues
@ -148,7 +148,7 @@ class MistralConfig(OpenAIGPTConfig):
return api_base, dynamic_api_key
def _transform_messages(
self, messages: List[AllMessageValues]
self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]:
"""
- handles scenario where content is list and not string

View file

@ -23,6 +23,7 @@ from litellm.types.llms.openai import (
from litellm.types.utils import (
GenericStreamingChunk,
ModelInfo,
ModelInfoBase,
ModelResponse,
ProviderField,
StreamingChoices,
@ -198,7 +199,7 @@ class OllamaConfig(BaseConfig):
return v
return None
def get_model_info(self, model: str) -> ModelInfo:
def get_model_info(self, model: str) -> ModelInfoBase:
"""
curl http://localhost:11434/api/show -d '{
"name": "mistral"
@ -222,11 +223,10 @@ class OllamaConfig(BaseConfig):
_max_tokens: Optional[int] = self._get_max_tokens(model_info)
return ModelInfo(
return ModelInfoBase(
key=model,
litellm_provider="ollama",
mode="chat",
supported_openai_params=self.get_supported_openai_params(model=model),
supports_function_calling=self._supports_function_calling(model_info),
input_cost_per_token=0.0,
output_cost_per_token=0.0,
@ -235,11 +235,6 @@ class OllamaConfig(BaseConfig):
max_output_tokens=_max_tokens,
)
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
return messages
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, Headers]
) -> BaseLLMException:

View file

@ -23,11 +23,6 @@ else:
class OobaboogaConfig(OpenAIGPTConfig):
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
return messages
def get_error_class(
self,
error_message: str,

View file

@ -164,7 +164,7 @@ class OpenAIGPTConfig(BaseConfig):
)
def _transform_messages(
self, messages: List[AllMessageValues]
self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]:
return messages

View file

@ -15,7 +15,14 @@ import types
from typing import Any, List, Optional, Union
import litellm
from litellm import verbose_logger
from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
from litellm.utils import (
supports_function_calling,
supports_response_schema,
supports_system_messages,
)
from .gpt_transformation import OpenAIGPTConfig
@ -29,6 +36,15 @@ class OpenAIO1Config(OpenAIGPTConfig):
def get_config(cls):
return super().get_config()
def should_fake_stream(
self, model: str, custom_llm_provider: Optional[str] = None
) -> bool:
supported_stream_models = ["o1-mini", "o1-preview"]
for supported_model in supported_stream_models:
if supported_model in model:
return False
return True
def get_supported_openai_params(self, model: str) -> list:
"""
Get the supported OpenAI params for the given model
@ -38,21 +54,37 @@ class OpenAIO1Config(OpenAIGPTConfig):
all_openai_params = super().get_supported_openai_params(model=model)
non_supported_params = [
"logprobs",
"tools",
"tool_choice",
"parallel_tool_calls",
"function_call",
"functions",
"top_p",
"n",
"presence_penalty",
"frequency_penalty",
"top_logprobs",
"response_format",
"stop",
"stream_options",
]
try:
model, custom_llm_provider, api_base, api_key = get_llm_provider(
model=model
)
except Exception:
verbose_logger.debug(
f"Unable to infer model provider for model={model}, defaulting to openai for o1 supported param check"
)
custom_llm_provider = "openai"
_supports_function_calling = supports_function_calling(
model, custom_llm_provider
)
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
if not _supports_function_calling:
non_supported_params.append("tools")
non_supported_params.append("tool_choice")
non_supported_params.append("parallel_tool_calls")
non_supported_params.append("function_call")
non_supported_params.append("functions")
if not _supports_response_schema:
non_supported_params.append("response_format")
return [
param for param in all_openai_params if param not in non_supported_params
]
@ -95,16 +127,16 @@ class OpenAIO1Config(OpenAIGPTConfig):
return False
def _transform_messages(
self, messages: List[AllMessageValues]
self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]:
"""
Handles limitations of O-1 model family.
- modalities: image => drop param (if user opts in to dropping param)
- role: system ==> translate to role 'user'
"""
_supports_system_messages = supports_system_messages(model, "openai")
for i, message in enumerate(messages):
if message["role"] == "system":
if message["role"] == "system" and not _supports_system_messages:
new_message = ChatCompletionUserMessage(
content=message["content"], role="user"
)

View file

@ -33,6 +33,7 @@ from litellm.litellm_core_utils.prompt_templates.factory import (
prompt_factory,
)
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
from litellm.secret_managers.main import get_secret_str
from litellm.types.utils import (
@ -198,7 +199,7 @@ class OpenAIConfig(BaseConfig):
return optional_params
def _transform_messages(
self, messages: List[AllMessageValues]
self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]:
return messages
@ -410,6 +411,24 @@ class OpenAIChatCompletion(BaseLLM):
else:
raise e
def mock_streaming(
self,
response: ModelResponse,
logging_obj: LiteLLMLoggingObj,
model: str,
stream_options: Optional[dict] = None,
) -> CustomStreamWrapper:
completion_stream = MockResponseIterator(model_response=response)
streaming_response = CustomStreamWrapper(
completion_stream=completion_stream,
model=model,
custom_llm_provider="openai",
logging_obj=logging_obj,
stream_options=stream_options,
)
return streaming_response
def completion( # type: ignore # noqa: PLR0915
self,
model_response: ModelResponse,
@ -433,8 +452,21 @@ class OpenAIChatCompletion(BaseLLM):
):
super().completion()
try:
fake_stream: bool = False
if custom_llm_provider is not None and model is not None:
provider_config = ProviderConfigManager.get_provider_chat_config(
model=model, provider=LlmProviders(custom_llm_provider)
)
fake_stream = provider_config.should_fake_stream(
model=model, custom_llm_provider=custom_llm_provider
)
inference_params = optional_params.copy()
stream_options: Optional[dict] = inference_params.pop(
"stream_options", None
)
stream: Optional[bool] = inference_params.pop("stream", False)
if headers:
optional_params["extra_headers"] = headers
inference_params["extra_headers"] = headers
if model is None or messages is None:
raise OpenAIError(status_code=422, message="Missing model or messages")
@ -456,7 +488,9 @@ class OpenAIChatCompletion(BaseLLM):
if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
provider_config, OpenAIConfig
):
messages = provider_config._transform_messages(messages)
messages = provider_config._transform_messages(
messages=messages, model=model
)
for _ in range(
2
@ -464,7 +498,7 @@ class OpenAIChatCompletion(BaseLLM):
data = OpenAIConfig().transform_request(
model=model,
messages=messages,
optional_params=optional_params,
optional_params=inference_params,
litellm_params=litellm_params,
headers=headers or {},
)
@ -472,7 +506,7 @@ class OpenAIChatCompletion(BaseLLM):
try:
max_retries = data.pop("max_retries", 2)
if acompletion is True:
if optional_params.get("stream", False):
if stream is True and fake_stream is False:
return self.async_streaming(
logging_obj=logging_obj,
headers=headers,
@ -485,11 +519,13 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=max_retries,
organization=organization,
drop_params=drop_params,
stream_options=stream_options,
)
else:
return self.acompletion(
data=data,
headers=headers,
model=model,
logging_obj=logging_obj,
model_response=model_response,
api_base=api_base,
@ -499,8 +535,9 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=max_retries,
organization=organization,
drop_params=drop_params,
fake_stream=fake_stream,
)
elif optional_params.get("stream", False):
elif stream is True and fake_stream is False:
return self.streaming(
logging_obj=logging_obj,
headers=headers,
@ -512,6 +549,7 @@ class OpenAIChatCompletion(BaseLLM):
client=client,
max_retries=max_retries,
organization=organization,
stream_options=stream_options,
)
else:
if not isinstance(max_retries, int):
@ -557,16 +595,26 @@ class OpenAIChatCompletion(BaseLLM):
original_response=stringified_response,
additional_args={"complete_input_dict": data},
)
return convert_to_model_response_object(
final_response_obj = convert_to_model_response_object(
response_object=stringified_response,
model_response_object=model_response,
_response_headers=headers,
)
if fake_stream is True:
return self.mock_streaming(
response=cast(ModelResponse, final_response_obj),
logging_obj=logging_obj,
model=model,
stream_options=stream_options,
)
return final_response_obj
except openai.UnprocessableEntityError as e:
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
if litellm.drop_params is True or drop_params is True:
optional_params = drop_params_from_unprocessable_entity_error(
e, optional_params
inference_params = drop_params_from_unprocessable_entity_error(
e, inference_params
)
else:
raise e
@ -623,6 +671,7 @@ class OpenAIChatCompletion(BaseLLM):
async def acompletion(
self,
data: dict,
model: str,
model_response: ModelResponse,
logging_obj: LiteLLMLoggingObj,
timeout: Union[float, httpx.Timeout],
@ -633,6 +682,8 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=None,
headers=None,
drop_params: Optional[bool] = None,
stream_options: Optional[dict] = None,
fake_stream: bool = False,
):
response = None
for _ in range(
@ -667,6 +718,7 @@ class OpenAIChatCompletion(BaseLLM):
openai_aclient=openai_aclient, data=data, timeout=timeout
)
stringified_response = response.model_dump()
logging_obj.post_call(
input=data["messages"],
api_key=api_key,
@ -674,12 +726,22 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data},
)
logging_obj.model_call_details["response_headers"] = headers
return convert_to_model_response_object(
final_response_obj = convert_to_model_response_object(
response_object=stringified_response,
model_response_object=model_response,
hidden_params={"headers": headers},
_response_headers=headers,
)
if fake_stream is True:
return self.mock_streaming(
response=cast(ModelResponse, final_response_obj),
logging_obj=logging_obj,
model=model,
stream_options=stream_options,
)
return final_response_obj
except openai.UnprocessableEntityError as e:
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
if litellm.drop_params is True or drop_params is True:
@ -710,7 +772,11 @@ class OpenAIChatCompletion(BaseLLM):
client=None,
max_retries=None,
headers=None,
stream_options: Optional[dict] = None,
):
data["stream"] = True
if stream_options is not None:
data["stream_options"] = stream_options
openai_client: OpenAI = self._get_openai_client( # type: ignore
is_async=False,
api_key=api_key,
@ -761,8 +827,12 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=None,
headers=None,
drop_params: Optional[bool] = None,
stream_options: Optional[dict] = None,
):
response = None
data["stream"] = True
if stream_options is not None:
data["stream_options"] = stream_options
for _ in range(2):
try:
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore

View file

@ -284,7 +284,9 @@ class OpenAILikeChatHandler(OpenAILikeBase):
if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
provider_config, OpenAIConfig
):
messages = provider_config._transform_messages(messages)
messages = provider_config._transform_messages(
messages=messages, model=model
)
data = {
"model": model,

View file

@ -139,11 +139,6 @@ class PredibaseConfig(BaseConfig):
"Predibase transformation currently done in handler.py. Need to migrate to this file."
)
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
return messages
def transform_request(
self,
model: str,

View file

@ -130,11 +130,6 @@ class ReplicateConfig(BaseConfig):
return split_model[1]
return model
def _transform_messages(
self, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
return messages
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:

View file

@ -57,12 +57,6 @@ class SagemakerConfig(BaseConfig):
def get_config(cls):
return super().get_config()
def _transform_messages(
self,
messages: List[AllMessageValues],
) -> List[AllMessageValues]:
return messages
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, Headers]
) -> BaseLLMException:

View file

@ -240,12 +240,6 @@ class IBMWatsonXAIConfig(BaseConfig):
"us-south",
]
def _transform_messages(
self,
messages: List[AllMessageValues],
) -> List[AllMessageValues]:
return messages
def get_error_class(
self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
) -> BaseLLMException:

View file

@ -13,7 +13,8 @@
"supports_audio_input": true,
"supports_audio_output": true,
"supports_prompt_caching": true,
"supports_response_schema": true
"supports_response_schema": true,
"supports_system_messages": true
},
"sambanova/Meta-Llama-3.1-8B-Instruct": {
"max_tokens": 16000,
@ -94,7 +95,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o": {
"max_tokens": 16384,
@ -109,7 +111,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-audio-preview": {
"max_tokens": 16384,
@ -124,7 +127,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-audio-preview-2024-10-01": {
"max_tokens": 16384,
@ -139,7 +143,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-mini-audio-preview-2024-12-17": {
"max_tokens": 16384,
@ -154,7 +159,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-mini": {
"max_tokens": 16384,
@ -169,7 +175,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-mini-2024-07-18": {
"max_tokens": 16384,
@ -184,7 +191,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"o1": {
"max_tokens": 100000,
@ -198,7 +206,9 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true
},
"o1-mini": {
"max_tokens": 65536,
@ -209,8 +219,6 @@
"cache_read_input_token_cost": 0.0000015,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
},
@ -223,8 +231,6 @@
"cache_read_input_token_cost": 0.0000015,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
},
@ -237,8 +243,6 @@
"cache_read_input_token_cost": 0.0000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
},
@ -251,8 +255,6 @@
"cache_read_input_token_cost": 0.0000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
},
@ -268,7 +270,9 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true
},
"chatgpt-4o-latest": {
"max_tokens": 4096,
@ -281,7 +285,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-2024-05-13": {
"max_tokens": 4096,
@ -294,7 +299,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-2024-08-06": {
"max_tokens": 16384,
@ -309,7 +315,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-2024-11-20": {
"max_tokens": 16384,
@ -324,7 +331,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-realtime-preview-2024-10-01": {
"max_tokens": 4096,
@ -341,7 +349,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-realtime-preview": {
"max_tokens": 4096,
@ -357,7 +366,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-realtime-preview-2024-12-17": {
"max_tokens": 4096,
@ -373,7 +383,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-mini-realtime-preview": {
"max_tokens": 4096,
@ -390,7 +401,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-mini-realtime-preview-2024-12-17": {
"max_tokens": 4096,
@ -407,7 +419,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4-turbo-preview": {
"max_tokens": 4096,
@ -419,7 +432,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-0314": {
"max_tokens": 4096,
@ -429,7 +443,8 @@
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-0613": {
"max_tokens": 4096,
@ -440,7 +455,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-32k": {
"max_tokens": 4096,
@ -450,7 +466,8 @@
"output_cost_per_token": 0.00012,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-32k-0314": {
"max_tokens": 4096,
@ -460,7 +477,8 @@
"output_cost_per_token": 0.00012,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-32k-0613": {
"max_tokens": 4096,
@ -470,7 +488,8 @@
"output_cost_per_token": 0.00012,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-turbo": {
"max_tokens": 4096,
@ -483,7 +502,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-turbo-2024-04-09": {
"max_tokens": 4096,
@ -496,7 +516,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-1106-preview": {
"max_tokens": 4096,
@ -508,7 +529,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-0125-preview": {
"max_tokens": 4096,
@ -520,7 +542,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-vision-preview": {
"max_tokens": 4096,
@ -531,7 +554,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-1106-vision-preview": {
"max_tokens": 4096,
@ -542,7 +566,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo": {
"max_tokens": 4097,
@ -553,7 +578,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4097,
@ -563,7 +589,8 @@
"output_cost_per_token": 0.000002,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-0613": {
"max_tokens": 4097,
@ -574,7 +601,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-1106": {
"max_tokens": 16385,
@ -586,7 +614,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-0125": {
"max_tokens": 16385,
@ -598,7 +627,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16385,
@ -608,7 +638,8 @@
"output_cost_per_token": 0.000004,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16385,
@ -618,7 +649,8 @@
"output_cost_per_token": 0.000004,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"ft:gpt-3.5-turbo": {
"max_tokens": 4096,
@ -627,7 +659,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_system_messages": true
},
"ft:gpt-3.5-turbo-0125": {
"max_tokens": 4096,
@ -636,7 +669,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_system_messages": true
},
"ft:gpt-3.5-turbo-1106": {
"max_tokens": 4096,
@ -645,7 +679,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_system_messages": true
},
"ft:gpt-3.5-turbo-0613": {
"max_tokens": 4096,
@ -654,7 +689,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_system_messages": true
},
"ft:gpt-4-0613": {
"max_tokens": 4096,
@ -665,7 +701,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing",
"supports_system_messages": true
},
"ft:gpt-4o-2024-08-06": {
"max_tokens": 16384,
@ -678,7 +715,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true
"supports_vision": true,
"supports_system_messages": true
},
"ft:gpt-4o-2024-11-20": {
"max_tokens": 16384,
@ -693,7 +731,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"ft:gpt-4o-mini-2024-07-18": {
"max_tokens": 16384,
@ -708,7 +747,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"ft:davinci-002": {
"max_tokens": 16384,
@ -3166,6 +3206,42 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
},
"gemini/gemini-2.0-flash-exp": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"litellm_provider": "gemini",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"tpm": 4000000,
"rpm": 10,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
},
"vertex_ai/claude-3-sonnet": {

View file

@ -74,11 +74,7 @@ class ProviderField(TypedDict):
field_value: str
class ModelInfo(TypedDict, total=False):
"""
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
"""
class ModelInfoBase(TypedDict, total=False):
key: Required[str] # the key in litellm.model_cost which is returned
max_tokens: Required[Optional[int]]
@ -119,7 +115,6 @@ class ModelInfo(TypedDict, total=False):
"completion", "embedding", "image_generation", "chat", "audio_transcription"
]
]
supported_openai_params: Required[Optional[List[str]]]
supports_system_messages: Optional[bool]
supports_response_schema: Optional[bool]
supports_vision: Optional[bool]
@ -133,6 +128,14 @@ class ModelInfo(TypedDict, total=False):
rpm: Optional[int]
class ModelInfo(ModelInfoBase, total=False):
"""
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
"""
supported_openai_params: Required[Optional[List[str]]]
class GenericStreamingChunk(TypedDict, total=False):
text: Required[str]
tool_use: Optional[ChatCompletionToolCallChunk]

View file

@ -132,6 +132,7 @@ from litellm.types.utils import (
LlmProviders,
Message,
ModelInfo,
ModelInfoBase,
ModelResponse,
ModelResponseStream,
ProviderField,
@ -1645,16 +1646,10 @@ def supports_system_messages(model: str, custom_llm_provider: Optional[str]) ->
Raises:
Exception: If the given model is not found in model_prices_and_context_window.json.
"""
try:
model_info = litellm.get_model_info(
model=model, custom_llm_provider=custom_llm_provider
)
if model_info.get("supports_system_messages", False) is True:
return True
return False
except Exception:
raise Exception(
f"Model not supports system messages. You passed model={model}, custom_llm_provider={custom_llm_provider}."
return _supports_factory(
model=model,
custom_llm_provider=custom_llm_provider,
key="supports_system_messages",
)
@ -1684,25 +1679,11 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) ->
if custom_llm_provider in PROVIDERS_GLOBALLY_SUPPORT_RESPONSE_SCHEMA:
return True
try:
## GET MODEL INFO
model_info = litellm.get_model_info(
model=model, custom_llm_provider=custom_llm_provider
)
if model_info.get("supports_response_schema", False) is True:
return True
except Exception:
## check if provider supports response schema globally
supported_params = get_supported_openai_params(
return _supports_factory(
model=model,
custom_llm_provider=custom_llm_provider,
request_type="chat_completion",
key="supports_response_schema",
)
if supported_params is not None and "response_schema" in supported_params:
return True
return False
def supports_function_calling(
@ -1721,22 +1702,10 @@ def supports_function_calling(
Raises:
Exception: If the given model is not found or there's an error in retrieval.
"""
try:
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
model=model, custom_llm_provider=custom_llm_provider
)
## CHECK IF MODEL SUPPORTS FUNCTION CALLING ##
model_info = litellm.get_model_info(
model=model, custom_llm_provider=custom_llm_provider
)
if model_info.get("supports_function_calling", False) is True:
return True
return False
except Exception as e:
raise Exception(
f"Model not found or error in checking function calling support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
return _supports_factory(
model=model,
custom_llm_provider=custom_llm_provider,
key="supports_function_calling",
)
@ -1759,7 +1728,7 @@ def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str)
model=model, custom_llm_provider=custom_llm_provider
)
model_info = litellm.get_model_info(
model_info = _get_model_info_helper(
model=model, custom_llm_provider=custom_llm_provider
)
@ -1767,9 +1736,10 @@ def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str)
return True
return False
except Exception as e:
raise Exception(
verbose_logger.debug(
f"Model not found or error in checking {key} support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
)
return False
def supports_audio_input(model: str, custom_llm_provider: Optional[str] = None) -> bool:
@ -4196,9 +4166,239 @@ def _get_potential_model_names(
)
def get_model_info( # noqa: PLR0915
def _get_max_position_embeddings(model_name: str) -> Optional[int]:
# Construct the URL for the config.json file
config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
try:
# Make the HTTP request to get the raw JSON file
response = litellm.module_level_client.get(config_url)
response.raise_for_status() # Raise an exception for bad responses (4xx or 5xx)
# Parse the JSON response
config_json = response.json()
# Extract and return the max_position_embeddings
max_position_embeddings = config_json.get("max_position_embeddings")
if max_position_embeddings is not None:
return max_position_embeddings
else:
return None
except Exception:
return None
def _get_model_info_helper( # noqa: PLR0915
model: str, custom_llm_provider: Optional[str] = None
) -> ModelInfo:
) -> ModelInfoBase:
"""
Helper for 'get_model_info'. Separated out to avoid infinite loop caused by returning 'supported_openai_param's
"""
try:
azure_llms = {**litellm.azure_llms, **litellm.azure_embedding_models}
if model in azure_llms:
model = azure_llms[model]
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai_beta":
custom_llm_provider = "vertex_ai"
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
if "meta/" + model in litellm.vertex_llama3_models:
model = "meta/" + model
elif model + "@latest" in litellm.vertex_mistral_models:
model = model + "@latest"
elif model + "@latest" in litellm.vertex_ai_ai21_models:
model = model + "@latest"
##########################
potential_model_names = _get_potential_model_names(
model=model, custom_llm_provider=custom_llm_provider
)
combined_model_name = potential_model_names["combined_model_name"]
stripped_model_name = potential_model_names["stripped_model_name"]
combined_stripped_model_name = potential_model_names[
"combined_stripped_model_name"
]
split_model = potential_model_names["split_model"]
custom_llm_provider = potential_model_names["custom_llm_provider"]
#########################
if custom_llm_provider == "huggingface":
max_tokens = _get_max_position_embeddings(model_name=model)
return ModelInfoBase(
key=model,
max_tokens=max_tokens, # type: ignore
max_input_tokens=None,
max_output_tokens=None,
input_cost_per_token=0,
output_cost_per_token=0,
litellm_provider="huggingface",
mode="chat",
supports_system_messages=None,
supports_response_schema=None,
supports_function_calling=None,
supports_assistant_prefill=None,
supports_prompt_caching=None,
supports_pdf_input=None,
)
elif custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
return litellm.OllamaConfig().get_model_info(model)
else:
"""
Check if: (in order of specificity)
1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq"
2. 'model' in litellm.model_cost. Checks "gemini-1.5-pro-002" in litellm.model_cost if model="gemini-1.5-pro-002" and custom_llm_provider=None
3. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given.
4. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given.
5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192"
"""
_model_info: Optional[Dict[str, Any]] = None
key: Optional[str] = None
if combined_model_name in litellm.model_cost:
key = combined_model_name
_model_info = _get_model_info_from_model_cost(key=key)
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and model in litellm.model_cost:
key = model
_model_info = _get_model_info_from_model_cost(key=key)
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if (
_model_info is None
and combined_stripped_model_name in litellm.model_cost
):
key = combined_stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and stripped_model_name in litellm.model_cost:
key = stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and split_model in litellm.model_cost:
key = split_model
_model_info = _get_model_info_from_model_cost(key=key)
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None or key is None:
raise ValueError(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
)
## PROVIDER-SPECIFIC INFORMATION
if custom_llm_provider == "predibase":
_model_info["supports_response_schema"] = True
_input_cost_per_token: Optional[float] = _model_info.get(
"input_cost_per_token"
)
if _input_cost_per_token is None:
# default value to 0, be noisy about this
verbose_logger.debug(
"model={}, custom_llm_provider={} has no input_cost_per_token in model_cost_map. Defaulting to 0.".format(
model, custom_llm_provider
)
)
_input_cost_per_token = 0
_output_cost_per_token: Optional[float] = _model_info.get(
"output_cost_per_token"
)
if _output_cost_per_token is None:
# default value to 0, be noisy about this
verbose_logger.debug(
"model={}, custom_llm_provider={} has no output_cost_per_token in model_cost_map. Defaulting to 0.".format(
model, custom_llm_provider
)
)
_output_cost_per_token = 0
return ModelInfoBase(
key=key,
max_tokens=_model_info.get("max_tokens", None),
max_input_tokens=_model_info.get("max_input_tokens", None),
max_output_tokens=_model_info.get("max_output_tokens", None),
input_cost_per_token=_input_cost_per_token,
cache_creation_input_token_cost=_model_info.get(
"cache_creation_input_token_cost", None
),
cache_read_input_token_cost=_model_info.get(
"cache_read_input_token_cost", None
),
input_cost_per_character=_model_info.get(
"input_cost_per_character", None
),
input_cost_per_token_above_128k_tokens=_model_info.get(
"input_cost_per_token_above_128k_tokens", None
),
input_cost_per_query=_model_info.get("input_cost_per_query", None),
input_cost_per_second=_model_info.get("input_cost_per_second", None),
input_cost_per_audio_token=_model_info.get(
"input_cost_per_audio_token", None
),
output_cost_per_token=_output_cost_per_token,
output_cost_per_audio_token=_model_info.get(
"output_cost_per_audio_token", None
),
output_cost_per_character=_model_info.get(
"output_cost_per_character", None
),
output_cost_per_token_above_128k_tokens=_model_info.get(
"output_cost_per_token_above_128k_tokens", None
),
output_cost_per_character_above_128k_tokens=_model_info.get(
"output_cost_per_character_above_128k_tokens", None
),
output_cost_per_second=_model_info.get("output_cost_per_second", None),
output_cost_per_image=_model_info.get("output_cost_per_image", None),
output_vector_size=_model_info.get("output_vector_size", None),
litellm_provider=_model_info.get(
"litellm_provider", custom_llm_provider
),
mode=_model_info.get("mode"), # type: ignore
supports_system_messages=_model_info.get(
"supports_system_messages", None
),
supports_response_schema=_model_info.get(
"supports_response_schema", None
),
supports_vision=_model_info.get("supports_vision", False),
supports_function_calling=_model_info.get(
"supports_function_calling", False
),
supports_assistant_prefill=_model_info.get(
"supports_assistant_prefill", False
),
supports_prompt_caching=_model_info.get(
"supports_prompt_caching", False
),
supports_audio_input=_model_info.get("supports_audio_input", False),
supports_audio_output=_model_info.get("supports_audio_output", False),
supports_pdf_input=_model_info.get("supports_pdf_input", False),
tpm=_model_info.get("tpm", None),
rpm=_model_info.get("rpm", None),
)
except Exception as e:
if "OllamaError" in str(e):
raise e
raise Exception(
"This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json.".format(
model, custom_llm_provider
)
)
def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo:
"""
Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model.
@ -4265,241 +4465,20 @@ def get_model_info( # noqa: PLR0915
"supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]
}
"""
supported_openai_params: Union[List[str], None] = []
def _get_max_position_embeddings(model_name):
# Construct the URL for the config.json file
config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
try:
# Make the HTTP request to get the raw JSON file
response = litellm.module_level_client.get(config_url)
response.raise_for_status() # Raise an exception for bad responses (4xx or 5xx)
# Parse the JSON response
config_json = response.json()
# Extract and return the max_position_embeddings
max_position_embeddings = config_json.get("max_position_embeddings")
if max_position_embeddings is not None:
return max_position_embeddings
else:
return None
except Exception:
return None
try:
azure_llms = {**litellm.azure_llms, **litellm.azure_embedding_models}
if model in azure_llms:
model = azure_llms[model]
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai_beta":
custom_llm_provider = "vertex_ai"
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
if "meta/" + model in litellm.vertex_llama3_models:
model = "meta/" + model
elif model + "@latest" in litellm.vertex_mistral_models:
model = model + "@latest"
elif model + "@latest" in litellm.vertex_ai_ai21_models:
model = model + "@latest"
##########################
potential_model_names = _get_potential_model_names(
model=model, custom_llm_provider=custom_llm_provider
)
combined_model_name = potential_model_names["combined_model_name"]
stripped_model_name = potential_model_names["stripped_model_name"]
combined_stripped_model_name = potential_model_names[
"combined_stripped_model_name"
]
split_model = potential_model_names["split_model"]
custom_llm_provider = potential_model_names["custom_llm_provider"]
#########################
supported_openai_params = litellm.get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider
)
if custom_llm_provider == "huggingface":
max_tokens = _get_max_position_embeddings(model_name=model)
return ModelInfo(
key=model,
max_tokens=max_tokens, # type: ignore
max_input_tokens=None,
max_output_tokens=None,
input_cost_per_token=0,
output_cost_per_token=0,
litellm_provider="huggingface",
mode="chat",
supported_openai_params=supported_openai_params,
supports_system_messages=None,
supports_response_schema=None,
supports_function_calling=None,
supports_assistant_prefill=None,
supports_prompt_caching=None,
supports_pdf_input=None,
)
elif custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
return litellm.OllamaConfig().get_model_info(model)
else:
"""
Check if: (in order of specificity)
1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq"
2. 'model' in litellm.model_cost. Checks "gemini-1.5-pro-002" in litellm.model_cost if model="gemini-1.5-pro-002" and custom_llm_provider=None
3. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given.
4. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given.
5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192"
"""
_model_info: Optional[Dict[str, Any]] = None
key: Optional[str] = None
if combined_model_name in litellm.model_cost:
key = combined_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and model in litellm.model_cost:
key = model
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if (
_model_info is None
and combined_stripped_model_name in litellm.model_cost
):
key = combined_stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and stripped_model_name in litellm.model_cost:
key = stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and split_model in litellm.model_cost:
key = split_model
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None or key is None:
raise ValueError(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
_model_info = _get_model_info_helper(
model=model,
custom_llm_provider=custom_llm_provider,
)
## PROVIDER-SPECIFIC INFORMATION
if custom_llm_provider == "predibase":
_model_info["supports_response_schema"] = True
returned_model_info = ModelInfo(
**_model_info, supported_openai_params=supported_openai_params
)
_input_cost_per_token: Optional[float] = _model_info.get(
"input_cost_per_token"
)
if _input_cost_per_token is None:
# default value to 0, be noisy about this
verbose_logger.debug(
"model={}, custom_llm_provider={} has no input_cost_per_token in model_cost_map. Defaulting to 0.".format(
model, custom_llm_provider
)
)
_input_cost_per_token = 0
_output_cost_per_token: Optional[float] = _model_info.get(
"output_cost_per_token"
)
if _output_cost_per_token is None:
# default value to 0, be noisy about this
verbose_logger.debug(
"model={}, custom_llm_provider={} has no output_cost_per_token in model_cost_map. Defaulting to 0.".format(
model, custom_llm_provider
)
)
_output_cost_per_token = 0
return ModelInfo(
key=key,
max_tokens=_model_info.get("max_tokens", None),
max_input_tokens=_model_info.get("max_input_tokens", None),
max_output_tokens=_model_info.get("max_output_tokens", None),
input_cost_per_token=_input_cost_per_token,
cache_creation_input_token_cost=_model_info.get(
"cache_creation_input_token_cost", None
),
cache_read_input_token_cost=_model_info.get(
"cache_read_input_token_cost", None
),
input_cost_per_character=_model_info.get(
"input_cost_per_character", None
),
input_cost_per_token_above_128k_tokens=_model_info.get(
"input_cost_per_token_above_128k_tokens", None
),
input_cost_per_query=_model_info.get("input_cost_per_query", None),
input_cost_per_second=_model_info.get("input_cost_per_second", None),
input_cost_per_audio_token=_model_info.get(
"input_cost_per_audio_token", None
),
output_cost_per_token=_output_cost_per_token,
output_cost_per_audio_token=_model_info.get(
"output_cost_per_audio_token", None
),
output_cost_per_character=_model_info.get(
"output_cost_per_character", None
),
output_cost_per_token_above_128k_tokens=_model_info.get(
"output_cost_per_token_above_128k_tokens", None
),
output_cost_per_character_above_128k_tokens=_model_info.get(
"output_cost_per_character_above_128k_tokens", None
),
output_cost_per_second=_model_info.get("output_cost_per_second", None),
output_cost_per_image=_model_info.get("output_cost_per_image", None),
output_vector_size=_model_info.get("output_vector_size", None),
litellm_provider=_model_info.get(
"litellm_provider", custom_llm_provider
),
mode=_model_info.get("mode"), # type: ignore
supported_openai_params=supported_openai_params,
supports_system_messages=_model_info.get(
"supports_system_messages", None
),
supports_response_schema=_model_info.get(
"supports_response_schema", None
),
supports_vision=_model_info.get("supports_vision", False),
supports_function_calling=_model_info.get(
"supports_function_calling", False
),
supports_assistant_prefill=_model_info.get(
"supports_assistant_prefill", False
),
supports_prompt_caching=_model_info.get(
"supports_prompt_caching", False
),
supports_audio_input=_model_info.get("supports_audio_input", False),
supports_audio_output=_model_info.get("supports_audio_output", False),
supports_pdf_input=_model_info.get("supports_pdf_input", False),
tpm=_model_info.get("tpm", None),
rpm=_model_info.get("rpm", None),
)
except Exception as e:
if "OllamaError" in str(e):
raise e
raise Exception(
"This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json.".format(
model, custom_llm_provider
)
)
return returned_model_info
def json_schema_type(python_type_name: str):

View file

@ -13,7 +13,8 @@
"supports_audio_input": true,
"supports_audio_output": true,
"supports_prompt_caching": true,
"supports_response_schema": true
"supports_response_schema": true,
"supports_system_messages": true
},
"sambanova/Meta-Llama-3.1-8B-Instruct": {
"max_tokens": 16000,
@ -94,7 +95,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o": {
"max_tokens": 16384,
@ -109,7 +111,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-audio-preview": {
"max_tokens": 16384,
@ -124,7 +127,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-audio-preview-2024-10-01": {
"max_tokens": 16384,
@ -139,7 +143,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-mini-audio-preview-2024-12-17": {
"max_tokens": 16384,
@ -154,7 +159,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-mini": {
"max_tokens": 16384,
@ -169,7 +175,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-mini-2024-07-18": {
"max_tokens": 16384,
@ -184,7 +191,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"o1": {
"max_tokens": 100000,
@ -198,7 +206,9 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true
},
"o1-mini": {
"max_tokens": 65536,
@ -209,8 +219,6 @@
"cache_read_input_token_cost": 0.0000015,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
},
@ -223,8 +231,6 @@
"cache_read_input_token_cost": 0.0000015,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
},
@ -237,8 +243,6 @@
"cache_read_input_token_cost": 0.0000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
},
@ -251,8 +255,6 @@
"cache_read_input_token_cost": 0.0000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
},
@ -268,7 +270,9 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true
},
"chatgpt-4o-latest": {
"max_tokens": 4096,
@ -281,7 +285,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-2024-05-13": {
"max_tokens": 4096,
@ -294,7 +299,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-2024-08-06": {
"max_tokens": 16384,
@ -309,7 +315,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-2024-11-20": {
"max_tokens": 16384,
@ -324,7 +331,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4o-realtime-preview-2024-10-01": {
"max_tokens": 4096,
@ -341,7 +349,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-realtime-preview": {
"max_tokens": 4096,
@ -357,7 +366,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-realtime-preview-2024-12-17": {
"max_tokens": 4096,
@ -373,7 +383,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-mini-realtime-preview": {
"max_tokens": 4096,
@ -390,7 +401,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4o-mini-realtime-preview-2024-12-17": {
"max_tokens": 4096,
@ -407,7 +419,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true
"supports_audio_output": true,
"supports_system_messages": true
},
"gpt-4-turbo-preview": {
"max_tokens": 4096,
@ -419,7 +432,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-0314": {
"max_tokens": 4096,
@ -429,7 +443,8 @@
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-0613": {
"max_tokens": 4096,
@ -440,7 +455,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-32k": {
"max_tokens": 4096,
@ -450,7 +466,8 @@
"output_cost_per_token": 0.00012,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-32k-0314": {
"max_tokens": 4096,
@ -460,7 +477,8 @@
"output_cost_per_token": 0.00012,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-32k-0613": {
"max_tokens": 4096,
@ -470,7 +488,8 @@
"output_cost_per_token": 0.00012,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-turbo": {
"max_tokens": 4096,
@ -483,7 +502,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-turbo-2024-04-09": {
"max_tokens": 4096,
@ -496,7 +516,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-1106-preview": {
"max_tokens": 4096,
@ -508,7 +529,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-0125-preview": {
"max_tokens": 4096,
@ -520,7 +542,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-vision-preview": {
"max_tokens": 4096,
@ -531,7 +554,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-4-1106-vision-preview": {
"max_tokens": 4096,
@ -542,7 +566,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo": {
"max_tokens": 4097,
@ -553,7 +578,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4097,
@ -563,7 +589,8 @@
"output_cost_per_token": 0.000002,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-0613": {
"max_tokens": 4097,
@ -574,7 +601,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-1106": {
"max_tokens": 16385,
@ -586,7 +614,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-0125": {
"max_tokens": 16385,
@ -598,7 +627,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16385,
@ -608,7 +638,8 @@
"output_cost_per_token": 0.000004,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16385,
@ -618,7 +649,8 @@
"output_cost_per_token": 0.000004,
"litellm_provider": "openai",
"mode": "chat",
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"ft:gpt-3.5-turbo": {
"max_tokens": 4096,
@ -627,7 +659,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_system_messages": true
},
"ft:gpt-3.5-turbo-0125": {
"max_tokens": 4096,
@ -636,7 +669,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_system_messages": true
},
"ft:gpt-3.5-turbo-1106": {
"max_tokens": 4096,
@ -645,7 +679,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_system_messages": true
},
"ft:gpt-3.5-turbo-0613": {
"max_tokens": 4096,
@ -654,7 +689,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_system_messages": true
},
"ft:gpt-4-0613": {
"max_tokens": 4096,
@ -665,7 +701,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing",
"supports_system_messages": true
},
"ft:gpt-4o-2024-08-06": {
"max_tokens": 16384,
@ -678,7 +715,8 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true
"supports_vision": true,
"supports_system_messages": true
},
"ft:gpt-4o-2024-11-20": {
"max_tokens": 16384,
@ -693,7 +731,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"ft:gpt-4o-mini-2024-07-18": {
"max_tokens": 16384,
@ -708,7 +747,8 @@
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_system_messages": true
},
"ft:davinci-002": {
"max_tokens": 16384,
@ -3166,6 +3206,42 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
},
"gemini/gemini-2.0-flash-exp": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"litellm_provider": "gemini",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"tpm": 4000000,
"rpm": 10,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
},
"vertex_ai/claude-3-sonnet": {

View file

@ -17,14 +17,19 @@ import litellm
from litellm import Choices, Message, ModelResponse
@pytest.mark.parametrize("model", ["o1-preview", "o1-mini", "o1"])
@pytest.mark.asyncio
async def test_o1_handle_system_role():
async def test_o1_handle_system_role(model):
"""
Tests that:
- max_tokens is translated to 'max_completion_tokens'
- role 'system' is translated to 'user'
"""
from openai import AsyncOpenAI
from litellm.utils import supports_system_messages
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True
@ -35,9 +40,9 @@ async def test_o1_handle_system_role():
) as mock_client:
try:
await litellm.acompletion(
model="o1-preview",
model=model,
max_tokens=10,
messages=[{"role": "system", "content": "Hello!"}],
messages=[{"role": "system", "content": "Be a good bot!"}],
client=client,
)
except Exception as e:
@ -48,9 +53,73 @@ async def test_o1_handle_system_role():
print("request_body: ", request_body)
assert request_body["model"] == "o1-preview"
assert request_body["model"] == model
assert request_body["max_completion_tokens"] == 10
assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
if supports_system_messages(model, "openai"):
assert request_body["messages"] == [
{"role": "system", "content": "Be a good bot!"}
]
else:
assert request_body["messages"] == [
{"role": "user", "content": "Be a good bot!"}
]
@pytest.mark.parametrize(
"model, expected_tool_calling_support",
[("o1-preview", False), ("o1-mini", False), ("o1", True)],
)
@pytest.mark.asyncio
async def test_o1_handle_tool_calling_optional_params(
model, expected_tool_calling_support
):
"""
Tests that:
- max_tokens is translated to 'max_completion_tokens'
- role 'system' is translated to 'user'
"""
from openai import AsyncOpenAI
from litellm.utils import ProviderConfigManager
from litellm.types.utils import LlmProviders
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
config = ProviderConfigManager.get_provider_chat_config(
model=model, provider=LlmProviders.OPENAI
)
supported_params = config.get_supported_openai_params(model=model)
assert expected_tool_calling_support == ("tools" in supported_params)
# @pytest.mark.parametrize(
# "model",
# ["o1"], # "o1-preview", "o1-mini",
# )
# @pytest.mark.asyncio
# async def test_o1_handle_streaming_e2e(model):
# """
# Tests that:
# - max_tokens is translated to 'max_completion_tokens'
# - role 'system' is translated to 'user'
# """
# from openai import AsyncOpenAI
# from litellm.utils import ProviderConfigManager
# from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
# from litellm.types.utils import LlmProviders
# resp = litellm.completion(
# model=model,
# messages=[{"role": "user", "content": "Hello!"}],
# stream=True,
# )
# assert isinstance(resp, CustomStreamWrapper)
# for chunk in resp:
# print("chunk: ", chunk)
# assert True
@pytest.mark.asyncio

View file

@ -2072,6 +2072,7 @@ def test_openai_chat_completion_complete_response_call():
"azure/chatgpt-v-2",
"claude-3-haiku-20240307",
"o1-preview",
"o1",
"azure/fake-o1-mini",
],
)