mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 19:54:13 +00:00
* fix(health.md): add rerank model health check information * build(model_prices_and_context_window.json): add gemini 2.0 for google ai studio - pricing + commercial rate limits * build(model_prices_and_context_window.json): add gemini-2.0 supports audio output = true * docs(team_model_add.md): clarify allowing teams to add models is an enterprise feature * fix(o1_transformation.py): add support for 'n', 'response_format' and 'stop' params for o1 and 'stream_options' param for o1-mini * build(model_prices_and_context_window.json): add 'supports_system_message' to supporting openai models needed as o1-preview, and o1-mini models don't support 'system message * fix(o1_transformation.py): translate system message based on if o1 model supports it * fix(o1_transformation.py): return 'stream' param support if o1-mini/o1-preview o1 currently doesn't support streaming, but the other model versions do Fixes https://github.com/BerriAI/litellm/issues/7292 * fix(o1_transformation.py): return tool calling/response_format in supported params if model map says so Fixes https://github.com/BerriAI/litellm/issues/7292 * fix: fix linting errors * fix: update '_transform_messages' * fix(o1_transformation.py): fix provider passed for supported param checks * test(base_llm_unit_tests.py): skip test if api takes >5s to respond * fix(utils.py): return false in 'supports_factory' if can't find value * fix(o1_transformation.py): always return stream + stream_options as supported params + handle stream options being passed in for azure o1 * feat(openai.py): support stream faking natively in openai handler Allows o1 calls to be faked for just the "o1" model, allows native streaming for o1-mini, o1-preview Fixes https://github.com/BerriAI/litellm/issues/7292 * fix(openai.py): use inference param instead of original optional param
99 lines
2.9 KiB
Python
99 lines
2.9 KiB
Python
"""
|
|
Handler file for calls to Azure OpenAI's o1 family of models
|
|
|
|
Written separately to handle faking streaming for o1 models.
|
|
"""
|
|
|
|
import asyncio
|
|
from typing import Any, Callable, List, Optional, Union
|
|
|
|
from httpx._config import Timeout
|
|
|
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
|
from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
|
|
from litellm.types.utils import ModelResponse
|
|
from litellm.utils import CustomStreamWrapper
|
|
|
|
from ..azure import AzureChatCompletion
|
|
|
|
|
|
class AzureOpenAIO1ChatCompletion(AzureChatCompletion):
|
|
|
|
async def mock_async_streaming(
|
|
self,
|
|
response: Any,
|
|
model: Optional[str],
|
|
logging_obj: Any,
|
|
):
|
|
model_response = await response
|
|
completion_stream = MockResponseIterator(model_response=model_response)
|
|
streaming_response = CustomStreamWrapper(
|
|
completion_stream=completion_stream,
|
|
model=model,
|
|
custom_llm_provider="azure",
|
|
logging_obj=logging_obj,
|
|
)
|
|
return streaming_response
|
|
|
|
def completion(
|
|
self,
|
|
model: str,
|
|
messages: List,
|
|
model_response: ModelResponse,
|
|
api_key: str,
|
|
api_base: str,
|
|
api_version: str,
|
|
api_type: str,
|
|
azure_ad_token: str,
|
|
dynamic_params: bool,
|
|
print_verbose: Callable[..., Any],
|
|
timeout: Union[float, Timeout],
|
|
logging_obj: Logging,
|
|
optional_params,
|
|
litellm_params,
|
|
logger_fn,
|
|
acompletion: bool = False,
|
|
headers: Optional[dict] = None,
|
|
client=None,
|
|
):
|
|
stream: Optional[bool] = optional_params.pop("stream", False)
|
|
stream_options: Optional[dict] = optional_params.pop("stream_options", None)
|
|
response = super().completion(
|
|
model,
|
|
messages,
|
|
model_response,
|
|
api_key,
|
|
api_base,
|
|
api_version,
|
|
api_type,
|
|
azure_ad_token,
|
|
dynamic_params,
|
|
print_verbose,
|
|
timeout,
|
|
logging_obj,
|
|
optional_params,
|
|
litellm_params,
|
|
logger_fn,
|
|
acompletion,
|
|
headers,
|
|
client,
|
|
)
|
|
|
|
if stream is True:
|
|
if asyncio.iscoroutine(response):
|
|
return self.mock_async_streaming(
|
|
response=response, model=model, logging_obj=logging_obj # type: ignore
|
|
)
|
|
|
|
completion_stream = MockResponseIterator(model_response=response)
|
|
streaming_response = CustomStreamWrapper(
|
|
completion_stream=completion_stream,
|
|
model=model,
|
|
custom_llm_provider="openai",
|
|
logging_obj=logging_obj,
|
|
stream_options=stream_options,
|
|
)
|
|
|
|
return streaming_response
|
|
else:
|
|
return response
|