(perf) use aiohttp for custom_openai (#7514)

* use aiohttp handler

* BaseLLMAIOHTTPHandler

* use CustomOpenAIChatConfig

* CustomOpenAIChatConfig

* CustomOpenAIChatConfig

* fix linting

* AiohttpOpenAIChatConfig

* fix order

* aiohttp_openai
This commit is contained in:
Ishaan Jaff 2025-01-02 22:15:17 -08:00 committed by GitHub
parent 2d57581307
commit 3a454ee2ce
8 changed files with 519 additions and 29 deletions

View file

@ -115,6 +115,7 @@ from .llms.bedrock.embed.embedding import BedrockEmbedding
from .llms.bedrock.image.image_handler import BedrockImageGeneration
from .llms.codestral.completion.handler import CodestralTextCompletion
from .llms.cohere.embed import handler as cohere_embed
from .llms.custom_httpx.aiohttp_handler import BaseLLMAIOHTTPHandler
from .llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
from .llms.databricks.chat.handler import DatabricksChatCompletion
@ -217,6 +218,7 @@ openai_like_embedding = OpenAILikeEmbeddingHandler()
openai_like_chat_completion = OpenAILikeChatHandler()
databricks_embedding = DatabricksEmbeddingHandler()
base_llm_http_handler = BaseLLMHTTPHandler()
base_llm_aiohttp_handler = BaseLLMAIOHTTPHandler()
sagemaker_chat_completion = SagemakerChatHandler()
####### COMPLETION ENDPOINTS ################
@ -474,6 +476,7 @@ async def acompletion(
or custom_llm_provider == "clarifai"
or custom_llm_provider == "watsonx"
or custom_llm_provider == "cloudflare"
or custom_llm_provider == "aiohttp_openai"
or custom_llm_provider in litellm.openai_compatible_providers
or custom_llm_provider in litellm._custom_providers
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
@ -2851,6 +2854,42 @@ def completion( # type: ignore # noqa: PLR0915
)
return response
response = model_response
elif custom_llm_provider == "aiohttp_openai":
api_base = (
api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
or litellm.api_base
or get_secret("OPENAI_API_BASE")
or "https://api.openai.com/v1"
)
# set API KEY
api_key = (
api_key
or litellm.api_key # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there
or litellm.openai_key
or get_secret("OPENAI_API_KEY")
)
headers = headers or litellm.headers
if extra_headers is not None:
optional_params["extra_headers"] = extra_headers
response = base_llm_aiohttp_handler.completion(
model=model,
messages=messages,
headers=headers,
model_response=model_response,
api_key=api_key,
api_base=api_base,
acompletion=acompletion,
logging_obj=logging,
optional_params=optional_params,
litellm_params=litellm_params,
timeout=timeout,
client=client,
custom_llm_provider=custom_llm_provider,
encoding=encoding,
stream=stream,
)
elif custom_llm_provider == "custom":
url = litellm.api_base or api_base or ""
if url is None or url == "":