mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
(Fixes) OpenAI Streaming Token Counting + Fixes usage track when litellm.turn_off_message_logging=True
(#8156)
* working streaming usage tracking * fix test_async_chat_openai_stream_options * fix await asyncio.sleep(1) * test_async_chat_azure * fix s3 logging * fix get_stream_options * fix get_stream_options * fix streaming handler * test_stream_token_counting_with_redaction * fix codeql concern
This commit is contained in:
parent
9f0f2b3f01
commit
2cf0daa31c
8 changed files with 268 additions and 94 deletions
|
@ -14,6 +14,7 @@ from typing import (
|
|||
Union,
|
||||
cast,
|
||||
)
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
import openai
|
||||
|
@ -833,8 +834,9 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
stream_options: Optional[dict] = None,
|
||||
):
|
||||
data["stream"] = True
|
||||
if stream_options is not None:
|
||||
data["stream_options"] = stream_options
|
||||
data.update(
|
||||
self.get_stream_options(stream_options=stream_options, api_base=api_base)
|
||||
)
|
||||
|
||||
openai_client: OpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=False,
|
||||
|
@ -893,8 +895,9 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
):
|
||||
response = None
|
||||
data["stream"] = True
|
||||
if stream_options is not None:
|
||||
data["stream_options"] = stream_options
|
||||
data.update(
|
||||
self.get_stream_options(stream_options=stream_options, api_base=api_base)
|
||||
)
|
||||
for _ in range(2):
|
||||
try:
|
||||
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
|
||||
|
@ -977,6 +980,20 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
status_code=500, message=f"{str(e)}", headers=error_headers
|
||||
)
|
||||
|
||||
def get_stream_options(
|
||||
self, stream_options: Optional[dict], api_base: Optional[str]
|
||||
) -> dict:
|
||||
"""
|
||||
Pass `stream_options` to the data dict for OpenAI requests
|
||||
"""
|
||||
if stream_options is not None:
|
||||
return {"stream_options": stream_options}
|
||||
else:
|
||||
# by default litellm will include usage for openai endpoints
|
||||
if api_base is None or urlparse(api_base).hostname == "api.openai.com":
|
||||
return {"stream_options": {"include_usage": True}}
|
||||
return {}
|
||||
|
||||
# Embedding
|
||||
@track_llm_api_timing()
|
||||
async def make_openai_embedding_request(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue