mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
LiteLLM Minor Fixes & Improvements (09/24/2024) (#5880)
* LiteLLM Minor Fixes & Improvements (09/23/2024) (#5842) * feat(auth_utils.py): enable admin to allow client-side credentials to be passed Makes it easier for devs to experiment with finetuned fireworks ai models * feat(router.py): allow setting configurable_clientside_auth_params for a model Closes https://github.com/BerriAI/litellm/issues/5843 * build(model_prices_and_context_window.json): fix anthropic claude-3-5-sonnet max output token limit Fixes https://github.com/BerriAI/litellm/issues/5850 * fix(azure_ai/): support content list for azure ai Fixes https://github.com/BerriAI/litellm/issues/4237 * fix(litellm_logging.py): always set saved_cache_cost Set to 0 by default * fix(fireworks_ai/cost_calculator.py): add fireworks ai default pricing handles calling 405b+ size models * fix(slack_alerting.py): fix error alerting for failed spend tracking Fixes regression with slack alerting error monitoring * fix(vertex_and_google_ai_studio_gemini.py): handle gemini no candidates in streaming chunk error * docs(bedrock.md): add llama3-1 models * test: fix tests * fix(azure_ai/chat): fix transformation for azure ai calls * feat(azure_ai/embed): Add azure ai embeddings support Closes https://github.com/BerriAI/litellm/issues/5861 * fix(azure_ai/embed): enable async embedding * feat(azure_ai/embed): support azure ai multimodal embeddings * fix(azure_ai/embed): support async multi modal embeddings * feat(together_ai/embed): support together ai embedding calls * feat(rerank/main.py): log source documents for rerank endpoints to langfuse improves rerank endpoint logging * fix(langfuse.py): support logging `/audio/speech` input to langfuse * test(test_embedding.py): fix test * test(test_completion_cost.py): fix helper util
This commit is contained in:
parent
5bc5eaff8a
commit
16c0307eab
25 changed files with 1675 additions and 340 deletions
|
@ -83,7 +83,7 @@ from .llms import (
|
|||
from .llms.AI21 import completion as ai21
|
||||
from .llms.anthropic.chat import AnthropicChatCompletion
|
||||
from .llms.anthropic.completion import AnthropicTextCompletion
|
||||
from .llms.azure_ai.chat.handler import AzureAIChatCompletion
|
||||
from .llms.azure_ai import AzureAIChatCompletion, AzureAIEmbedding
|
||||
from .llms.azure_text import AzureTextCompletion
|
||||
from .llms.AzureOpenAI.audio_transcriptions import AzureAudioTranscription
|
||||
from .llms.AzureOpenAI.azure import AzureChatCompletion, _check_dynamic_azure_params
|
||||
|
@ -168,6 +168,7 @@ openai_o1_chat_completions = OpenAIO1ChatCompletion()
|
|||
openai_audio_transcriptions = OpenAIAudioTranscription()
|
||||
databricks_chat_completions = DatabricksChatCompletion()
|
||||
azure_ai_chat_completions = AzureAIChatCompletion()
|
||||
azure_ai_embedding = AzureAIEmbedding()
|
||||
anthropic_chat_completions = AnthropicChatCompletion()
|
||||
anthropic_text_completions = AnthropicTextCompletion()
|
||||
azure_chat_completions = AzureChatCompletion()
|
||||
|
@ -3215,6 +3216,8 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse:
|
|||
or custom_llm_provider == "cohere"
|
||||
or custom_llm_provider == "huggingface"
|
||||
or custom_llm_provider == "bedrock"
|
||||
or custom_llm_provider == "azure_ai"
|
||||
or custom_llm_provider == "together_ai"
|
||||
): # currently implemented aiohttp calls for just azure and openai, soon all.
|
||||
# Await normally
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
|
@ -3385,6 +3388,9 @@ def embedding(
|
|||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
)
|
||||
if dynamic_api_key is not None:
|
||||
api_key = dynamic_api_key
|
||||
|
||||
optional_params = get_optional_params_embeddings(
|
||||
model=model,
|
||||
user=user,
|
||||
|
@ -3481,7 +3487,9 @@ def embedding(
|
|||
aembedding=aembedding,
|
||||
)
|
||||
elif (
|
||||
model in litellm.open_ai_embedding_models or custom_llm_provider == "openai"
|
||||
model in litellm.open_ai_embedding_models
|
||||
or custom_llm_provider == "openai"
|
||||
or custom_llm_provider == "together_ai"
|
||||
):
|
||||
api_base = (
|
||||
api_base
|
||||
|
@ -3832,6 +3840,33 @@ def embedding(
|
|||
model_response=EmbeddingResponse(),
|
||||
aembedding=aembedding,
|
||||
)
|
||||
elif custom_llm_provider == "azure_ai":
|
||||
api_base = (
|
||||
api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
|
||||
or litellm.api_base
|
||||
or get_secret("AZURE_AI_API_BASE")
|
||||
)
|
||||
# set API KEY
|
||||
api_key = (
|
||||
api_key
|
||||
or litellm.api_key # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there
|
||||
or litellm.openai_key
|
||||
or get_secret("AZURE_AI_API_KEY")
|
||||
)
|
||||
|
||||
## EMBEDDING CALL
|
||||
response = azure_ai_embedding.embedding(
|
||||
model=model,
|
||||
input=input,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
logging_obj=logging,
|
||||
timeout=timeout,
|
||||
model_response=EmbeddingResponse(),
|
||||
optional_params=optional_params,
|
||||
client=client,
|
||||
aembedding=aembedding,
|
||||
)
|
||||
else:
|
||||
args = locals()
|
||||
raise ValueError(f"No valid embedding model args passed in - {args}")
|
||||
|
@ -4901,7 +4936,11 @@ def speech(
|
|||
aspeech: Optional[bool] = None,
|
||||
**kwargs,
|
||||
) -> HttpxBinaryResponseContent:
|
||||
|
||||
user = kwargs.get("user", None)
|
||||
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
|
||||
proxy_server_request = kwargs.get("proxy_server_request", None)
|
||||
model_info = kwargs.get("model_info", None)
|
||||
metadata = kwargs.get("metadata", {})
|
||||
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore
|
||||
tags = kwargs.pop("tags", [])
|
||||
|
||||
|
@ -4918,6 +4957,21 @@ def speech(
|
|||
max_retries = litellm.num_retries or openai.DEFAULT_MAX_RETRIES
|
||||
|
||||
logging_obj = kwargs.get("litellm_logging_obj", None)
|
||||
logging_obj.update_environment_variables(
|
||||
model=model,
|
||||
user=user,
|
||||
optional_params={},
|
||||
litellm_params={
|
||||
"litellm_call_id": litellm_call_id,
|
||||
"proxy_server_request": proxy_server_request,
|
||||
"model_info": model_info,
|
||||
"metadata": metadata,
|
||||
"preset_cache_key": None,
|
||||
"stream_response": {},
|
||||
**kwargs,
|
||||
},
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
response: Optional[HttpxBinaryResponseContent] = None
|
||||
if custom_llm_provider == "openai":
|
||||
if voice is None or not (isinstance(voice, str)):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue