forked from phoenix/litellm-mirror
* Fix Vertex AI function calling invoke: use JSON format instead of protobuf text format. (#6702) * test: test tool_call conversion when arguments is empty dict Fixes https://github.com/BerriAI/litellm/issues/6833 * fix(openai_like/handler.py): return more descriptive error message Fixes https://github.com/BerriAI/litellm/issues/6812 * test: skip overloaded model * docs(anthropic.md): update anthropic docs to show how to route to any new model * feat(groq/): fake stream when 'response_format' param is passed Groq doesn't support streaming when response_format is set * feat(groq/): add response_format support for groq Closes https://github.com/BerriAI/litellm/issues/6845 * fix(o1_handler.py): remove fake streaming for o1 Closes https://github.com/BerriAI/litellm/issues/6801 * build(model_prices_and_context_window.json): add groq llama3.2b model pricing Closes https://github.com/BerriAI/litellm/issues/6807 * fix(utils.py): fix handling ollama response format param Fixes https://github.com/BerriAI/litellm/issues/6848#issuecomment-2491215485 * docs(sidebars.js): refactor chat endpoint placement * fix: fix linting errors * test: fix test * test: fix test * fix(openai_like/handler): handle max retries * fix(streaming_handler.py): fix streaming check for openai-compatible providers * test: update test * test: correctly handle model is overloaded error * test: update test * test: fix test * test: mark flaky test --------- Co-authored-by: Guowang Li <Guowang@users.noreply.github.com>
63 lines
1.8 KiB
Python
63 lines
1.8 KiB
Python
"""
|
|
Handler file for calls to OpenAI's o1 family of models
|
|
|
|
Written separately to handle faking streaming for o1 models.
|
|
"""
|
|
|
|
import asyncio
|
|
from typing import Any, Callable, List, Optional, Union
|
|
|
|
from httpx._config import Timeout
|
|
|
|
from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
|
|
from litellm.llms.OpenAI.openai import OpenAIChatCompletion
|
|
from litellm.types.utils import ModelResponse
|
|
from litellm.utils import CustomStreamWrapper
|
|
|
|
|
|
class OpenAIO1ChatCompletion(OpenAIChatCompletion):
|
|
|
|
def completion(
|
|
self,
|
|
model_response: ModelResponse,
|
|
timeout: Union[float, Timeout],
|
|
optional_params: dict,
|
|
logging_obj: Any,
|
|
model: Optional[str] = None,
|
|
messages: Optional[list] = None,
|
|
print_verbose: Optional[Callable[..., Any]] = None,
|
|
api_key: Optional[str] = None,
|
|
api_base: Optional[str] = None,
|
|
acompletion: bool = False,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers: Optional[dict] = None,
|
|
custom_prompt_dict: dict = {},
|
|
client=None,
|
|
organization: Optional[str] = None,
|
|
custom_llm_provider: Optional[str] = None,
|
|
drop_params: Optional[bool] = None,
|
|
):
|
|
# stream: Optional[bool] = optional_params.pop("stream", False)
|
|
response = super().completion(
|
|
model_response,
|
|
timeout,
|
|
optional_params,
|
|
logging_obj,
|
|
model,
|
|
messages,
|
|
print_verbose,
|
|
api_key,
|
|
api_base,
|
|
acompletion,
|
|
litellm_params,
|
|
logger_fn,
|
|
headers,
|
|
custom_prompt_dict,
|
|
client,
|
|
organization,
|
|
custom_llm_provider,
|
|
drop_params,
|
|
)
|
|
|
|
return response
|