Litellm dev 01 06 2025 p1 (#7594)

* fix(custom_logger.py): expose new 'async_get_chat_completion_prompt' event hook

* fix(custom_logger.py): langfuse_prompt_management.py

remove 'headers' from custom logger 'async_get_chat_completion_prompt' and 'get_chat_completion_prompt' event hooks

* feat(router.py): expose new function for prompt management based routing

* feat(router.py): partial working router prompt factory logic

allows load balanced model to be used for model name w/ langfuse prompt management call

* feat(router.py): fix prompt management with load balanced model group

* feat(langfuse_prompt_management.py): support reading in openai params from langfuse

enables user to define optional params on langfuse vs. client code

* test(test_Router.py): add unit test for router based langfuse prompt management

* fix: fix linting errors
This commit is contained in:
Krish Dholakia 2025-01-06 21:26:21 -08:00 committed by GitHub
parent 7133cf5b74
commit fef7839e8a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 214 additions and 90 deletions

View file

@ -2700,3 +2700,40 @@ def test_router_completion_with_model_id():
) as mock_pre_call_checks:
router.completion(model="123", messages=[{"role": "user", "content": "hi"}])
mock_pre_call_checks.assert_not_called()
def test_router_prompt_management_factory():
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "gpt-3.5-turbo"},
},
{
"model_name": "chatbot_actions",
"litellm_params": {
"model": "langfuse/openai-gpt-3.5-turbo",
"tpm": 1000000,
"prompt_id": "jokes",
},
},
{
"model_name": "openai-gpt-3.5-turbo",
"litellm_params": {
"model": "openai/gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
]
)
assert router._is_prompt_management_model("chatbot_actions") is True
assert router._is_prompt_management_model("openai-gpt-3.5-turbo") is False
response = router._prompt_management_factory(
model="chatbot_actions",
messages=[{"role": "user", "content": "Hello world!"}],
kwargs={},
)
print(response)