LiteLLM Minor Fixes & Improvements (09/24/2024) (#5880)

* LiteLLM Minor Fixes & Improvements (09/23/2024)  (#5842)

* feat(auth_utils.py): enable admin to allow client-side credentials to be passed

Makes it easier for devs to experiment with finetuned fireworks ai models

* feat(router.py): allow setting configurable_clientside_auth_params for a model

Closes https://github.com/BerriAI/litellm/issues/5843

* build(model_prices_and_context_window.json): fix anthropic claude-3-5-sonnet max output token limit

Fixes https://github.com/BerriAI/litellm/issues/5850

* fix(azure_ai/): support content list for azure ai

Fixes https://github.com/BerriAI/litellm/issues/4237

* fix(litellm_logging.py): always set saved_cache_cost

Set to 0 by default

* fix(fireworks_ai/cost_calculator.py): add fireworks ai default pricing

handles calling 405b+ size models

* fix(slack_alerting.py): fix error alerting for failed spend tracking

Fixes regression with slack alerting error monitoring

* fix(vertex_and_google_ai_studio_gemini.py): handle gemini no candidates in streaming chunk error

* docs(bedrock.md): add llama3-1 models

* test: fix tests

* fix(azure_ai/chat): fix transformation for azure ai calls

* feat(azure_ai/embed): Add azure ai embeddings support

Closes https://github.com/BerriAI/litellm/issues/5861

* fix(azure_ai/embed): enable async embedding

* feat(azure_ai/embed): support azure ai multimodal embeddings

* fix(azure_ai/embed): support async multi modal embeddings

* feat(together_ai/embed): support together ai embedding calls

* feat(rerank/main.py): log source documents for rerank endpoints to langfuse

improves rerank endpoint logging

* fix(langfuse.py): support logging `/audio/speech` input to langfuse

* test(test_embedding.py): fix test

* test(test_completion_cost.py): fix helper util
This commit is contained in:
Krish Dholakia 2024-09-25 22:11:57 -07:00 committed by GitHub
parent 5bc5eaff8a
commit 16c0307eab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 1675 additions and 340 deletions

View file

@ -83,7 +83,7 @@ from .llms import (
from .llms.AI21 import completion as ai21
from .llms.anthropic.chat import AnthropicChatCompletion
from .llms.anthropic.completion import AnthropicTextCompletion
from .llms.azure_ai.chat.handler import AzureAIChatCompletion
from .llms.azure_ai import AzureAIChatCompletion, AzureAIEmbedding
from .llms.azure_text import AzureTextCompletion
from .llms.AzureOpenAI.audio_transcriptions import AzureAudioTranscription
from .llms.AzureOpenAI.azure import AzureChatCompletion, _check_dynamic_azure_params
@ -168,6 +168,7 @@ openai_o1_chat_completions = OpenAIO1ChatCompletion()
openai_audio_transcriptions = OpenAIAudioTranscription()
databricks_chat_completions = DatabricksChatCompletion()
azure_ai_chat_completions = AzureAIChatCompletion()
azure_ai_embedding = AzureAIEmbedding()
anthropic_chat_completions = AnthropicChatCompletion()
anthropic_text_completions = AnthropicTextCompletion()
azure_chat_completions = AzureChatCompletion()
@ -3215,6 +3216,8 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse:
or custom_llm_provider == "cohere"
or custom_llm_provider == "huggingface"
or custom_llm_provider == "bedrock"
or custom_llm_provider == "azure_ai"
or custom_llm_provider == "together_ai"
): # currently implemented aiohttp calls for just azure and openai, soon all.
# Await normally
init_response = await loop.run_in_executor(None, func_with_context)
@ -3385,6 +3388,9 @@ def embedding(
api_base=api_base,
api_key=api_key,
)
if dynamic_api_key is not None:
api_key = dynamic_api_key
optional_params = get_optional_params_embeddings(
model=model,
user=user,
@ -3481,7 +3487,9 @@ def embedding(
aembedding=aembedding,
)
elif (
model in litellm.open_ai_embedding_models or custom_llm_provider == "openai"
model in litellm.open_ai_embedding_models
or custom_llm_provider == "openai"
or custom_llm_provider == "together_ai"
):
api_base = (
api_base
@ -3832,6 +3840,33 @@ def embedding(
model_response=EmbeddingResponse(),
aembedding=aembedding,
)
elif custom_llm_provider == "azure_ai":
api_base = (
api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
or litellm.api_base
or get_secret("AZURE_AI_API_BASE")
)
# set API KEY
api_key = (
api_key
or litellm.api_key # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there
or litellm.openai_key
or get_secret("AZURE_AI_API_KEY")
)
## EMBEDDING CALL
response = azure_ai_embedding.embedding(
model=model,
input=input,
api_base=api_base,
api_key=api_key,
logging_obj=logging,
timeout=timeout,
model_response=EmbeddingResponse(),
optional_params=optional_params,
client=client,
aembedding=aembedding,
)
else:
args = locals()
raise ValueError(f"No valid embedding model args passed in - {args}")
@ -4901,7 +4936,11 @@ def speech(
aspeech: Optional[bool] = None,
**kwargs,
) -> HttpxBinaryResponseContent:
user = kwargs.get("user", None)
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
proxy_server_request = kwargs.get("proxy_server_request", None)
model_info = kwargs.get("model_info", None)
metadata = kwargs.get("metadata", {})
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore
tags = kwargs.pop("tags", [])
@ -4918,6 +4957,21 @@ def speech(
max_retries = litellm.num_retries or openai.DEFAULT_MAX_RETRIES
logging_obj = kwargs.get("litellm_logging_obj", None)
logging_obj.update_environment_variables(
model=model,
user=user,
optional_params={},
litellm_params={
"litellm_call_id": litellm_call_id,
"proxy_server_request": proxy_server_request,
"model_info": model_info,
"metadata": metadata,
"preset_cache_key": None,
"stream_response": {},
**kwargs,
},
custom_llm_provider=custom_llm_provider,
)
response: Optional[HttpxBinaryResponseContent] = None
if custom_llm_provider == "openai":
if voice is None or not (isinstance(voice, str)):