mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
* test(azure_openai_o1.py): initial commit with testing for azure openai o1 preview model * fix(base_llm_unit_tests.py): handle azure o1 preview response format tests skip as o1 on azure doesn't support tool calling yet * fix: initial commit of azure o1 handler using openai caller simplifies calling + allows fake streaming logic alr. implemented for openai to just work * feat(azure/o1_handler.py): fake o1 streaming for azure o1 models azure does not currently support streaming for o1 * feat(o1_transformation.py): support overriding 'should_fake_stream' on azure/o1 via 'supports_native_streaming' param on model info enables user to toggle on when azure allows o1 streaming without needing to bump versions * style(router.py): remove 'give feedback/get help' messaging when router is used Prevents noisy messaging Closes https://github.com/BerriAI/litellm/issues/5942 * fix(types/utils.py): handle none logprobs Fixes https://github.com/BerriAI/litellm/issues/328 * fix(exception_mapping_utils.py): fix error str unbound error * refactor(azure_ai/): move to openai_like chat completion handler allows for easy swapping of api base url's (e.g. ai.services.com) Fixes https://github.com/BerriAI/litellm/issues/7275 * refactor(azure_ai/): move to base llm http handler * fix(azure_ai/): handle differing api endpoints * fix(azure_ai/): make sure all unit tests are passing * fix: fix linting errors * fix: fix linting errors * fix: fix linting error * fix: fix linting errors * fix(azure_ai/transformation.py): handle extra body param * fix(azure_ai/transformation.py): fix max retries param handling * fix: fix test * test(test_azure_o1.py): fix test * fix(llm_http_handler.py): support handling azure ai unprocessable entity error * fix(llm_http_handler.py): handle sync invalid param error for azure ai * fix(azure_ai/): streaming support with base_llm_http_handler * fix(llm_http_handler.py): working sync stream calls with unprocessable entity handling for azure ai * fix: fix linting errors * fix(llm_http_handler.py): fix linting error * fix(azure_ai/): handle cohere tool call invalid index param error
137 lines
4.2 KiB
Python
137 lines
4.2 KiB
Python
import json
|
|
from abc import abstractmethod
|
|
from typing import Optional, Union
|
|
|
|
from litellm.types.utils import GenericStreamingChunk, ModelResponseStream
|
|
|
|
|
|
class BaseModelResponseIterator:
|
|
def __init__(
|
|
self, streaming_response, sync_stream: bool, json_mode: Optional[bool] = False
|
|
):
|
|
self.streaming_response = streaming_response
|
|
self.response_iterator = self.streaming_response
|
|
self.json_mode = json_mode
|
|
|
|
def chunk_parser(
|
|
self, chunk: dict
|
|
) -> Union[GenericStreamingChunk, ModelResponseStream]:
|
|
return GenericStreamingChunk(
|
|
text="",
|
|
is_finished=False,
|
|
finish_reason="",
|
|
usage=None,
|
|
index=0,
|
|
tool_use=None,
|
|
)
|
|
|
|
# Sync iterator
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def _handle_string_chunk(
|
|
self, str_line: str
|
|
) -> Union[GenericStreamingChunk, ModelResponseStream]:
|
|
# chunk is a str at this point
|
|
if "[DONE]" in str_line:
|
|
return GenericStreamingChunk(
|
|
text="",
|
|
is_finished=True,
|
|
finish_reason="stop",
|
|
usage=None,
|
|
index=0,
|
|
tool_use=None,
|
|
)
|
|
elif str_line.startswith("data:"):
|
|
data_json = json.loads(str_line[5:])
|
|
return self.chunk_parser(chunk=data_json)
|
|
else:
|
|
return GenericStreamingChunk(
|
|
text="",
|
|
is_finished=False,
|
|
finish_reason="",
|
|
usage=None,
|
|
index=0,
|
|
tool_use=None,
|
|
)
|
|
|
|
def __next__(self):
|
|
try:
|
|
chunk = self.response_iterator.__next__()
|
|
except StopIteration:
|
|
raise StopIteration
|
|
except ValueError as e:
|
|
raise RuntimeError(f"Error receiving chunk from stream: {e}")
|
|
|
|
try:
|
|
str_line = chunk
|
|
if isinstance(chunk, bytes): # Handle binary data
|
|
str_line = chunk.decode("utf-8") # Convert bytes to string
|
|
index = str_line.find("data:")
|
|
if index != -1:
|
|
str_line = str_line[index:]
|
|
# chunk is a str at this point
|
|
return self._handle_string_chunk(str_line=str_line)
|
|
except StopIteration:
|
|
raise StopIteration
|
|
except ValueError as e:
|
|
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
|
|
|
|
# Async iterator
|
|
def __aiter__(self):
|
|
self.async_response_iterator = self.streaming_response.__aiter__()
|
|
return self
|
|
|
|
async def __anext__(self):
|
|
try:
|
|
chunk = await self.async_response_iterator.__anext__()
|
|
except StopAsyncIteration:
|
|
raise StopAsyncIteration
|
|
except ValueError as e:
|
|
raise RuntimeError(f"Error receiving chunk from stream: {e}")
|
|
|
|
try:
|
|
str_line = chunk
|
|
if isinstance(chunk, bytes): # Handle binary data
|
|
str_line = chunk.decode("utf-8") # Convert bytes to string
|
|
index = str_line.find("data:")
|
|
if index != -1:
|
|
str_line = str_line[index:]
|
|
|
|
# chunk is a str at this point
|
|
return self._handle_string_chunk(str_line=str_line)
|
|
except StopAsyncIteration:
|
|
raise StopAsyncIteration
|
|
except ValueError as e:
|
|
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
|
|
|
|
|
|
class FakeStreamResponseIterator:
|
|
def __init__(self, model_response, json_mode: Optional[bool] = False):
|
|
self.model_response = model_response
|
|
self.json_mode = json_mode
|
|
self.is_done = False
|
|
|
|
# Sync iterator
|
|
def __iter__(self):
|
|
return self
|
|
|
|
@abstractmethod
|
|
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
|
|
pass
|
|
|
|
def __next__(self):
|
|
if self.is_done:
|
|
raise StopIteration
|
|
self.is_done = True
|
|
return self.chunk_parser(self.model_response)
|
|
|
|
# Async iterator
|
|
def __aiter__(self):
|
|
return self
|
|
|
|
async def __anext__(self):
|
|
if self.is_done:
|
|
raise StopAsyncIteration
|
|
self.is_done = True
|
|
return self.chunk_parser(self.model_response)
|