feat(proxy_cli.py): add new 'log_config' cli param (#6352)

* feat(proxy_cli.py): add new 'log_config' cli param

Allows passing logging.conf to uvicorn on startup

* docs(cli.md): add logging conf to uvicorn cli docs

* fix(get_llm_provider_logic.py): fix default api base for litellm_proxy

Fixes https://github.com/BerriAI/litellm/issues/6332

* feat(openai_like/embedding): Add support for jina ai embeddings

Closes https://github.com/BerriAI/litellm/issues/6337

* docs(deploy.md): update entrypoint.sh filepath post-refactor

Fixes outdated docs

* feat(prometheus.py): emit time_to_first_token metric on prometheus

Closes https://github.com/BerriAI/litellm/issues/6334

* fix(prometheus.py): only emit time to first token metric if stream is True

enables more accurate ttft usage

* test: handle vertex api instability

* fix(get_llm_provider_logic.py): fix import

* fix(openai.py): fix deepinfra default api base

* fix(anthropic/transformation.py): remove anthropic beta header (#6361)
This commit is contained in:
Krish Dholakia 2024-10-21 21:25:58 -07:00 committed by GitHub
parent 7338b24a74
commit 2b9db05e08
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 839 additions and 263 deletions

View file

@ -121,6 +121,7 @@ from .llms.huggingface_restapi import Huggingface
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion
from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion
from .llms.openai_like.embedding.handler import OpenAILikeEmbeddingHandler
from .llms.predibase import PredibaseChatCompletion
from .llms.prompt_templates.common_utils import get_completion_messages
from .llms.prompt_templates.factory import (
@ -220,6 +221,7 @@ vertex_partner_models_chat_completion = VertexAIPartnerModels()
vertex_text_to_speech = VertexTextToSpeechAPI()
watsonxai = IBMWatsonXAI()
sagemaker_llm = SagemakerLLM()
openai_like_embedding = OpenAILikeEmbeddingHandler()
####### COMPLETION ENDPOINTS ################
@ -3129,6 +3131,7 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse:
or custom_llm_provider == "bedrock"
or custom_llm_provider == "azure_ai"
or custom_llm_provider == "together_ai"
or custom_llm_provider == "openai_like"
): # currently implemented aiohttp calls for just azure and openai, soon all.
# Await normally
init_response = await loop.run_in_executor(None, func_with_context)
@ -3477,6 +3480,32 @@ def embedding( # noqa: PLR0915
client=client,
aembedding=aembedding,
)
elif custom_llm_provider == "openai_like":
api_base = (
api_base or litellm.api_base or get_secret_str("OPENAI_LIKE_API_BASE")
)
# set API KEY
api_key = (
api_key
or litellm.api_key
or litellm.openai_like_key
or get_secret_str("OPENAI_LIKE_API_KEY")
)
## EMBEDDING CALL
response = openai_like_embedding.embedding(
model=model,
input=input,
api_base=api_base,
api_key=api_key,
logging_obj=logging,
timeout=timeout,
model_response=EmbeddingResponse(),
optional_params=optional_params,
client=client,
aembedding=aembedding,
)
elif custom_llm_provider == "cohere" or custom_llm_provider == "cohere_chat":
cohere_key = (
api_key