Fix langfuse prompt management on proxy (#7535)

* fix(types/utils.py): support langfuse + humanloop routes on llm router

* fix(main.py): remove acompletion elif block

just await if coroutine returned
This commit is contained in:
Krish Dholakia 2025-01-03 12:42:37 -08:00 committed by GitHub
parent 24589c49af
commit f6698e871f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 28 additions and 67 deletions

View file

@ -435,74 +435,26 @@ async def acompletion(
ctx = contextvars.copy_context() ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func) func_with_context = partial(ctx.run, func)
if ( init_response = await loop.run_in_executor(None, func_with_context)
custom_llm_provider == "openai" if isinstance(init_response, dict) or isinstance(
or custom_llm_provider == "azure" init_response, ModelResponse
or custom_llm_provider == "azure_text" ): ## CACHING SCENARIO
or custom_llm_provider == "custom_openai" if isinstance(init_response, dict):
or custom_llm_provider == "anyscale" response = ModelResponse(**init_response)
or custom_llm_provider == "mistral" response = init_response
or custom_llm_provider == "openrouter" elif asyncio.iscoroutine(init_response):
or custom_llm_provider == "deepinfra" response = await init_response
or custom_llm_provider == "perplexity"
or custom_llm_provider == "groq"
or custom_llm_provider == "nvidia_nim"
or custom_llm_provider == "cohere_chat"
or custom_llm_provider == "cohere"
or custom_llm_provider == "cerebras"
or custom_llm_provider == "sambanova"
or custom_llm_provider == "ai21_chat"
or custom_llm_provider == "ai21"
or custom_llm_provider == "volcengine"
or custom_llm_provider == "codestral"
or custom_llm_provider == "text-completion-codestral"
or custom_llm_provider == "deepseek"
or custom_llm_provider == "text-completion-openai"
or custom_llm_provider == "huggingface"
or custom_llm_provider == "ollama"
or custom_llm_provider == "ollama_chat"
or custom_llm_provider == "replicate"
or custom_llm_provider == "vertex_ai"
or custom_llm_provider == "vertex_ai_beta"
or custom_llm_provider == "gemini"
or custom_llm_provider == "sagemaker"
or custom_llm_provider == "sagemaker_chat"
or custom_llm_provider == "anthropic"
or custom_llm_provider == "anthropic_text"
or custom_llm_provider == "predibase"
or custom_llm_provider == "bedrock"
or custom_llm_provider == "databricks"
or custom_llm_provider == "triton"
or custom_llm_provider == "clarifai"
or custom_llm_provider == "watsonx"
or custom_llm_provider == "cloudflare"
or custom_llm_provider == "aiohttp_openai"
or custom_llm_provider in litellm.openai_compatible_providers
or custom_llm_provider in litellm._custom_providers
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
init_response = await loop.run_in_executor(None, func_with_context)
if isinstance(init_response, dict) or isinstance(
init_response, ModelResponse
): ## CACHING SCENARIO
if isinstance(init_response, dict):
response = ModelResponse(**init_response)
response = init_response
elif asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response # type: ignore
if (
custom_llm_provider == "text-completion-openai"
or custom_llm_provider == "text-completion-codestral"
) and isinstance(response, TextCompletionResponse):
response = litellm.OpenAITextCompletionConfig().convert_to_chat_model_response_object(
response_object=response,
model_response_object=litellm.ModelResponse(),
)
else: else:
# Call the synchronous function using run_in_executor response = init_response # type: ignore
response = await loop.run_in_executor(None, func_with_context) # type: ignore
if (
custom_llm_provider == "text-completion-openai"
or custom_llm_provider == "text-completion-codestral"
) and isinstance(response, TextCompletionResponse):
response = litellm.OpenAITextCompletionConfig().convert_to_chat_model_response_object(
response_object=response,
model_response_object=litellm.ModelResponse(),
)
if isinstance(response, CustomStreamWrapper): if isinstance(response, CustomStreamWrapper):
response.set_logging_event_loop( response.set_logging_event_loop(
loop=loop loop=loop

View file

@ -3,6 +3,13 @@ model_list:
litellm_params: litellm_params:
model: openai/gpt-3.5-turbo model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
- model_name: chatbot_actions
litellm_params:
model: langfuse/azure/gpt-4o
api_base: "os.environ/AZURE_API_BASE"
api_key: "os.environ/AZURE_API_KEY"
tpm: 1000000
prompt_id: "jokes"
litellm_settings: litellm_settings:
default_team_settings: default_team_settings:

View file

@ -1800,6 +1800,8 @@ class LlmProviders(str, Enum):
INFINITY = "infinity" INFINITY = "infinity"
DEEPGRAM = "deepgram" DEEPGRAM = "deepgram"
AIOHTTP_OPENAI = "aiohttp_openai" AIOHTTP_OPENAI = "aiohttp_openai"
LANGFUSE = "langfuse"
HUMANLOOP = "humanloop"
class LiteLLMLoggingBaseClass: class LiteLLMLoggingBaseClass: