mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Litellm dev 2024 12 20 p1 (#7335)
* fix(utils.py): e2e azure tts cost tracking working moves tts response obj to include hidden params (allows for litellm call id, etc. to be sent in response headers) ; fixes spend_Tracking_utils logging payload to account for non-base model use-case Fixes https://github.com/BerriAI/litellm/issues/7223 * fix: fix linting errors * build(model_prices_and_context_window.json): add bedrock llama 3.3 Closes https://github.com/BerriAI/litellm/issues/7329 * fix(openai.py): fix return type for sync openai httpx response * test: update test * fix(spend_tracking_utils.py): fix if check * fix(spend_tracking_utils.py): fix if check * test: improve debugging for test * fix: fix import
This commit is contained in:
parent
522da384b6
commit
404bf2974b
12 changed files with 63 additions and 36 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -66,3 +66,4 @@ litellm/tests/langfuse.log
|
|||
litellm/tests/langfuse.log
|
||||
litellm/proxy/google-cloud-sdk/*
|
||||
tests/llm_translation/log.txt
|
||||
venv/
|
||||
|
|
|
@ -1386,7 +1386,7 @@ class AzureChatCompletion(BaseLLM):
|
|||
input=input,
|
||||
**optional_params,
|
||||
)
|
||||
return response
|
||||
return HttpxBinaryResponseContent(response=response.response)
|
||||
|
||||
async def async_audio_speech(
|
||||
self,
|
||||
|
@ -1415,14 +1415,14 @@ class AzureChatCompletion(BaseLLM):
|
|||
client_type="async",
|
||||
) # type: ignore
|
||||
|
||||
response = await azure_client.audio.speech.create(
|
||||
azure_response = await azure_client.audio.speech.create(
|
||||
model=model,
|
||||
voice=voice, # type: ignore
|
||||
input=input,
|
||||
**optional_params,
|
||||
)
|
||||
|
||||
return response
|
||||
return HttpxBinaryResponseContent(response=azure_response.response)
|
||||
|
||||
def get_headers(
|
||||
self,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Any, Coroutine, Optional, Union
|
||||
from typing import Any, Coroutine, Optional, Union, cast
|
||||
|
||||
import httpx
|
||||
from openai import AsyncAzureOpenAI, AzureOpenAI
|
||||
|
@ -111,7 +111,7 @@ class AzureOpenAIFilesAPI(BaseLLM):
|
|||
openai_client: AsyncAzureOpenAI,
|
||||
) -> HttpxBinaryResponseContent:
|
||||
response = await openai_client.files.content(**file_content_request)
|
||||
return response
|
||||
return HttpxBinaryResponseContent(response=response.response)
|
||||
|
||||
def file_content(
|
||||
self,
|
||||
|
@ -152,9 +152,11 @@ class AzureOpenAIFilesAPI(BaseLLM):
|
|||
file_content_request=file_content_request,
|
||||
openai_client=openai_client,
|
||||
)
|
||||
response = openai_client.files.content(**file_content_request)
|
||||
response = cast(AzureOpenAI, openai_client).files.content(
|
||||
**file_content_request
|
||||
)
|
||||
|
||||
return response
|
||||
return HttpxBinaryResponseContent(response=response.response)
|
||||
|
||||
async def aretrieve_file(
|
||||
self,
|
||||
|
|
|
@ -1253,13 +1253,13 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
client=client,
|
||||
)
|
||||
|
||||
response = openai_client.audio.speech.create(
|
||||
response = cast(OpenAI, openai_client).audio.speech.create(
|
||||
model=model,
|
||||
voice=voice, # type: ignore
|
||||
input=input,
|
||||
**optional_params,
|
||||
)
|
||||
return response # type: ignore
|
||||
return HttpxBinaryResponseContent(response=response.response)
|
||||
|
||||
async def async_audio_speech(
|
||||
self,
|
||||
|
@ -1276,13 +1276,16 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
client=None,
|
||||
) -> HttpxBinaryResponseContent:
|
||||
|
||||
openai_client = self._get_openai_client(
|
||||
openai_client = cast(
|
||||
AsyncOpenAI,
|
||||
self._get_openai_client(
|
||||
is_async=True,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
client=client,
|
||||
),
|
||||
)
|
||||
|
||||
response = await openai_client.audio.speech.create(
|
||||
|
@ -1292,7 +1295,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
**optional_params,
|
||||
)
|
||||
|
||||
return response
|
||||
return HttpxBinaryResponseContent(response=response.response)
|
||||
|
||||
async def ahealth_check(
|
||||
self,
|
||||
|
@ -1477,7 +1480,7 @@ class OpenAIFilesAPI(BaseLLM):
|
|||
openai_client: AsyncOpenAI,
|
||||
) -> HttpxBinaryResponseContent:
|
||||
response = await openai_client.files.content(**file_content_request)
|
||||
return response
|
||||
return HttpxBinaryResponseContent(response=response.response)
|
||||
|
||||
def file_content(
|
||||
self,
|
||||
|
@ -1515,9 +1518,9 @@ class OpenAIFilesAPI(BaseLLM):
|
|||
file_content_request=file_content_request,
|
||||
openai_client=openai_client,
|
||||
)
|
||||
response = openai_client.files.content(**file_content_request)
|
||||
response = cast(OpenAI, openai_client).files.content(**file_content_request)
|
||||
|
||||
return response
|
||||
return HttpxBinaryResponseContent(response=response.response)
|
||||
|
||||
async def aretrieve_file(
|
||||
self,
|
||||
|
|
|
@ -5986,6 +5986,15 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"meta.llama3-3-70b-instruct-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000072,
|
||||
"output_cost_per_token": 0.00000072,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
"meta.llama2-13b-chat-v1": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
|
|
|
@ -12,12 +12,6 @@ model_list:
|
|||
model: openai/gpt-4o
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
num_retries: 3
|
||||
- model_name: azure-tts
|
||||
litellm_params:
|
||||
model: azure/tts-1
|
||||
api_key: "6a4ULQtTSBZi5TONpzGP2GkKDUSSReHFQYwjMpZaywNxY03FsplGJQQJ99ALACHrzpqXJ3w3AAAAACOGUsjy"
|
||||
api_base: "https://krris-m4rve6fd-northcentralus.cognitiveservices.azure.com/openai/deployments/tts"
|
||||
api_version: "2024-05-01-preview"
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["langfuse"]
|
|
@ -1,7 +1,7 @@
|
|||
import json
|
||||
import secrets
|
||||
from datetime import datetime as dt
|
||||
from typing import Optional
|
||||
from typing import Optional, cast
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
@ -40,7 +40,9 @@ def get_logging_payload(
|
|||
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
if response_obj is None:
|
||||
if response_obj is None or (
|
||||
not isinstance(response_obj, BaseModel) and not isinstance(response_obj, dict)
|
||||
):
|
||||
response_obj = {}
|
||||
# standardize this function to be used across, s3, dynamoDB, langfuse logging
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
|
@ -50,10 +52,10 @@ def get_logging_payload(
|
|||
completion_start_time = kwargs.get("completion_start_time", end_time)
|
||||
call_type = kwargs.get("call_type")
|
||||
cache_hit = kwargs.get("cache_hit", False)
|
||||
usage = response_obj.get("usage", None) or {}
|
||||
usage = cast(dict, response_obj).get("usage", None) or {}
|
||||
if isinstance(usage, litellm.Usage):
|
||||
usage = dict(usage)
|
||||
id = response_obj.get("id") or kwargs.get("litellm_call_id")
|
||||
id = cast(dict, response_obj).get("id") or kwargs.get("litellm_call_id")
|
||||
api_key = metadata.get("user_api_key", "")
|
||||
if api_key is not None and isinstance(api_key, str):
|
||||
if api_key.startswith("sk-"):
|
||||
|
|
|
@ -12,7 +12,7 @@ from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
|
|||
from litellm.llms.jina_ai.rerank.handler import JinaAIRerank
|
||||
from litellm.llms.together_ai.rerank.handler import TogetherAIRerank
|
||||
from litellm.rerank_api.rerank_utils import get_optional_rerank_params
|
||||
from litellm.secret_managers.main import get_secret
|
||||
from litellm.secret_managers.main import get_secret, get_secret_str
|
||||
from litellm.types.rerank import OptionalRerankParams, RerankResponse
|
||||
from litellm.types.router import *
|
||||
from litellm.utils import ProviderConfigManager, client, exception_type
|
||||
|
@ -211,7 +211,7 @@ def rerank( # noqa: PLR0915
|
|||
dynamic_api_base
|
||||
or optional_params.api_base
|
||||
or litellm.api_base
|
||||
or get_secret("INFINITY_API_BASE") # type: ignore
|
||||
or get_secret_str("INFINITY_API_BASE")
|
||||
)
|
||||
|
||||
if api_base is None:
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
from os import PathLike
|
||||
from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
|
||||
|
||||
from openai._legacy_response import HttpxBinaryResponseContent
|
||||
from openai._legacy_response import (
|
||||
HttpxBinaryResponseContent as _HttpxBinaryResponseContent,
|
||||
)
|
||||
from openai.lib.streaming._assistants import (
|
||||
AssistantEventHandler,
|
||||
AssistantStreamManager,
|
||||
|
@ -48,6 +50,11 @@ FileTypes = Union[
|
|||
EmbeddingInput = Union[str, List[str]]
|
||||
|
||||
|
||||
class HttpxBinaryResponseContent(_HttpxBinaryResponseContent):
|
||||
_hidden_params: dict = {}
|
||||
pass
|
||||
|
||||
|
||||
class NotGiven:
|
||||
"""
|
||||
A sentinel singleton class used to distinguish omitted keyword arguments
|
||||
|
|
|
@ -5986,6 +5986,15 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"meta.llama3-3-70b-instruct-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000072,
|
||||
"output_cost_per_token": 0.00000072,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
"meta.llama2-13b-chat-v1": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
|
|
|
@ -210,7 +210,7 @@ async def test_get_router_response():
|
|||
# reason="Local test. Vertex AI Quota is low. Leads to rate limit errors on ci/cd."
|
||||
# )
|
||||
# @pytest.mark.flaky(retries=3, delay=1)
|
||||
def test_vertex_ai_anthropic():
|
||||
def test_aavertex_ai_anthropic():
|
||||
model = "claude-3-sonnet@20240229"
|
||||
|
||||
vertex_ai_project = "adroit-crow-413218"
|
||||
|
|
|
@ -61,7 +61,7 @@ async def test_audio_speech_litellm(sync_mode, model, api_base, api_key):
|
|||
optional_params={},
|
||||
)
|
||||
|
||||
from litellm.llms.openai.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
|
||||
assert isinstance(response, HttpxBinaryResponseContent)
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue