build: merge litellm_dev_03_01_2025_p2

This commit is contained in:
Krrish Dholakia 2025-03-03 23:05:41 -08:00
parent 87dd195b51
commit 8ea3d4c046
11 changed files with 124 additions and 24 deletions

View file

@ -40,6 +40,7 @@ in_memory_dynamic_logger_cache = DynamicLoggingCache()
def langfuse_client_init( def langfuse_client_init(
langfuse_public_key=None, langfuse_public_key=None,
langfuse_secret=None, langfuse_secret=None,
langfuse_secret_key=None,
langfuse_host=None, langfuse_host=None,
flush_interval=1, flush_interval=1,
) -> LangfuseClass: ) -> LangfuseClass:
@ -67,7 +68,10 @@ def langfuse_client_init(
) )
# Instance variables # Instance variables
secret_key = langfuse_secret or os.getenv("LANGFUSE_SECRET_KEY")
secret_key = (
langfuse_secret or langfuse_secret_key or os.getenv("LANGFUSE_SECRET_KEY")
)
public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY") public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY")
langfuse_host = langfuse_host or os.getenv( langfuse_host = langfuse_host or os.getenv(
"LANGFUSE_HOST", "https://cloud.langfuse.com" "LANGFUSE_HOST", "https://cloud.langfuse.com"
@ -190,6 +194,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
langfuse_client = langfuse_client_init( langfuse_client = langfuse_client_init(
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"), langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
langfuse_secret=dynamic_callback_params.get("langfuse_secret"), langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"),
langfuse_host=dynamic_callback_params.get("langfuse_host"), langfuse_host=dynamic_callback_params.get("langfuse_host"),
) )
langfuse_prompt_client = self._get_prompt_from_id( langfuse_prompt_client = self._get_prompt_from_id(
@ -206,6 +211,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
langfuse_client = langfuse_client_init( langfuse_client = langfuse_client_init(
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"), langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
langfuse_secret=dynamic_callback_params.get("langfuse_secret"), langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"),
langfuse_host=dynamic_callback_params.get("langfuse_host"), langfuse_host=dynamic_callback_params.get("langfuse_host"),
) )
langfuse_prompt_client = self._get_prompt_from_id( langfuse_prompt_client = self._get_prompt_from_id(

File diff suppressed because one or more lines are too long

View file

@ -1,11 +1,9 @@
model_list: model_list:
- model_name: openai-gpt-4o-mini-2024-07-18 - model_name: my-langfuse-model
litellm_params: litellm_params:
model: openai/gpt-4o-mini-2024-07-18 model: langfuse/openai-model
configurable_clientside_auth_params: ["api_key"] api_key: os.environ/OPENAI_API_KEY
api_key: "my-bad-key" - model_name: openai-model
# - model_name: openai-fallback-model litellm_params:
# litellm_params: model: openai/gpt-3.5-turbo
# model: openai/gpt-3.5-turbo api_key: os.environ/OPENAI_API_KEY

View file

@ -1003,6 +1003,7 @@ class AddTeamCallback(LiteLLMPydanticObjectBase):
class TeamCallbackMetadata(LiteLLMPydanticObjectBase): class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
success_callback: Optional[List[str]] = [] success_callback: Optional[List[str]] = []
failure_callback: Optional[List[str]] = [] failure_callback: Optional[List[str]] = []
callbacks: Optional[List[str]] = []
# for now - only supported for langfuse # for now - only supported for langfuse
callback_vars: Optional[Dict[str, str]] = {} callback_vars: Optional[Dict[str, str]] = {}
@ -1015,6 +1016,9 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
failure_callback = values.get("failure_callback", []) failure_callback = values.get("failure_callback", [])
if failure_callback is None: if failure_callback is None:
values.pop("failure_callback", None) values.pop("failure_callback", None)
callbacks = values.get("callbacks", [])
if callbacks is None:
values.pop("callbacks", None)
callback_vars = values.get("callback_vars", {}) callback_vars = values.get("callback_vars", {})
if callback_vars is None: if callback_vars is None:
@ -1023,6 +1027,7 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
return { return {
"success_callback": [], "success_callback": [],
"failure_callback": [], "failure_callback": [],
"callbacks": [],
"callback_vars": {}, "callback_vars": {},
} }
valid_keys = set(StandardCallbackDynamicParams.__annotations__.keys()) valid_keys = set(StandardCallbackDynamicParams.__annotations__.keys())

View file

@ -102,11 +102,15 @@ def convert_key_logging_metadata_to_callback(
if data.callback_name not in team_callback_settings_obj.failure_callback: if data.callback_name not in team_callback_settings_obj.failure_callback:
team_callback_settings_obj.failure_callback.append(data.callback_name) team_callback_settings_obj.failure_callback.append(data.callback_name)
elif data.callback_type == "success_and_failure": elif (
not data.callback_type or data.callback_type == "success_and_failure"
): # assume 'success_and_failure' = litellm.callbacks
if team_callback_settings_obj.success_callback is None: if team_callback_settings_obj.success_callback is None:
team_callback_settings_obj.success_callback = [] team_callback_settings_obj.success_callback = []
if team_callback_settings_obj.failure_callback is None: if team_callback_settings_obj.failure_callback is None:
team_callback_settings_obj.failure_callback = [] team_callback_settings_obj.failure_callback = []
if team_callback_settings_obj.callbacks is None:
team_callback_settings_obj.callbacks = []
if data.callback_name not in team_callback_settings_obj.success_callback: if data.callback_name not in team_callback_settings_obj.success_callback:
team_callback_settings_obj.success_callback.append(data.callback_name) team_callback_settings_obj.success_callback.append(data.callback_name)
@ -114,6 +118,9 @@ def convert_key_logging_metadata_to_callback(
if data.callback_name not in team_callback_settings_obj.failure_callback: if data.callback_name not in team_callback_settings_obj.failure_callback:
team_callback_settings_obj.failure_callback.append(data.callback_name) team_callback_settings_obj.failure_callback.append(data.callback_name)
if data.callback_name not in team_callback_settings_obj.callbacks:
team_callback_settings_obj.callbacks.append(data.callback_name)
for var, value in data.callback_vars.items(): for var, value in data.callback_vars.items():
if team_callback_settings_obj.callback_vars is None: if team_callback_settings_obj.callback_vars is None:
team_callback_settings_obj.callback_vars = {} team_callback_settings_obj.callback_vars = {}

View file

@ -1750,6 +1750,7 @@ class Router:
model=model, model=model,
messages=[{"role": "user", "content": "prompt"}], messages=[{"role": "user", "content": "prompt"}],
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
request_kwargs=kwargs,
) )
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
@ -1863,6 +1864,7 @@ class Router:
model=model, model=model,
messages=[{"role": "user", "content": "prompt"}], messages=[{"role": "user", "content": "prompt"}],
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
request_kwargs=kwargs,
) )
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
@ -1961,6 +1963,7 @@ class Router:
model=model, model=model,
messages=[{"role": "user", "content": "prompt"}], messages=[{"role": "user", "content": "prompt"}],
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
request_kwargs=kwargs,
) )
self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs) self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
data = deployment["litellm_params"].copy() data = deployment["litellm_params"].copy()
@ -2036,6 +2039,7 @@ class Router:
deployment = await self.async_get_available_deployment( deployment = await self.async_get_available_deployment(
model=model, model=model,
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
request_kwargs=kwargs,
) )
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
data = deployment["litellm_params"].copy() data = deployment["litellm_params"].copy()
@ -2080,6 +2084,7 @@ class Router:
model=model, model=model,
messages=messages, messages=messages,
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
request_kwargs=kwargs,
) )
data = deployment["litellm_params"].copy() data = deployment["litellm_params"].copy()
@ -2185,6 +2190,7 @@ class Router:
model=model, model=model,
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
request_kwargs=kwargs,
) )
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
@ -2283,6 +2289,7 @@ class Router:
model=model, model=model,
messages=[{"role": "user", "content": "default text"}], messages=[{"role": "user", "content": "default text"}],
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
request_kwargs=kwargs,
) )
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
@ -2452,6 +2459,7 @@ class Router:
model=model, model=model,
input=input, input=input,
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
request_kwargs=kwargs,
) )
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
data = deployment["litellm_params"].copy() data = deployment["litellm_params"].copy()
@ -2549,6 +2557,7 @@ class Router:
model=model, model=model,
messages=[{"role": "user", "content": "files-api-fake-text"}], messages=[{"role": "user", "content": "files-api-fake-text"}],
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
request_kwargs=kwargs,
) )
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
@ -2654,6 +2663,7 @@ class Router:
model=model, model=model,
messages=[{"role": "user", "content": "files-api-fake-text"}], messages=[{"role": "user", "content": "files-api-fake-text"}],
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
request_kwargs=kwargs,
) )
metadata_variable_name = _get_router_metadata_variable_name( metadata_variable_name = _get_router_metadata_variable_name(
function_name="_acreate_batch" function_name="_acreate_batch"
@ -2850,7 +2860,8 @@ class Router:
): ):
if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]): if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]):
deployment = await self.async_get_available_deployment( deployment = await self.async_get_available_deployment(
model=kwargs["model"] model=kwargs["model"],
request_kwargs=kwargs,
) )
kwargs["model"] = deployment["litellm_params"]["model"] kwargs["model"] = deployment["litellm_params"]["model"]
return await original_function(**kwargs) return await original_function(**kwargs)
@ -5590,10 +5601,10 @@ class Router:
async def async_get_available_deployment( async def async_get_available_deployment(
self, self,
model: str, model: str,
request_kwargs: Dict,
messages: Optional[List[Dict[str, str]]] = None, messages: Optional[List[Dict[str, str]]] = None,
input: Optional[Union[str, List]] = None, input: Optional[Union[str, List]] = None,
specific_deployment: Optional[bool] = False, specific_deployment: Optional[bool] = False,
request_kwargs: Optional[Dict] = None,
): ):
""" """
Async implementation of 'get_available_deployments'. Async implementation of 'get_available_deployments'.

View file

@ -451,6 +451,15 @@ def get_applied_guardrails(kwargs: Dict[str, Any]) -> List[str]:
return applied_guardrails return applied_guardrails
def get_dynamic_callbacks(
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]]
) -> List:
returned_callbacks = litellm.callbacks.copy()
if dynamic_callbacks:
returned_callbacks.extend(dynamic_callbacks) # type: ignore
return returned_callbacks
def function_setup( # noqa: PLR0915 def function_setup( # noqa: PLR0915
original_function: str, rules_obj, start_time, *args, **kwargs original_function: str, rules_obj, start_time, *args, **kwargs
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
@ -475,12 +484,18 @@ def function_setup( # noqa: PLR0915
## LOGGING SETUP ## LOGGING SETUP
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
if len(litellm.callbacks) > 0: ## DYNAMIC CALLBACKS ##
for callback in litellm.callbacks: dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
kwargs.pop("callbacks", None)
)
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
if len(all_callbacks) > 0:
for callback in all_callbacks:
# check if callback is a string - e.g. "lago", "openmeter" # check if callback is a string - e.g. "lago", "openmeter"
if isinstance(callback, str): if isinstance(callback, str):
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
callback, internal_usage_cache=None, llm_router=None callback, internal_usage_cache=None, llm_router=None # type: ignore
) )
if callback is None or any( if callback is None or any(
isinstance(cb, type(callback)) isinstance(cb, type(callback))

View file

@ -119,7 +119,7 @@ async def test_router_get_available_deployments(async_test):
if async_test is True: if async_test is True:
await router.cache.async_set_cache(key=cache_key, value=request_count_dict) await router.cache.async_set_cache(key=cache_key, value=request_count_dict)
deployment = await router.async_get_available_deployment( deployment = await router.async_get_available_deployment(
model=model_group, messages=None model=model_group, messages=None, request_kwargs={}
) )
else: else:
router.cache.set_cache(key=cache_key, value=request_count_dict) router.cache.set_cache(key=cache_key, value=request_count_dict)

View file

@ -569,7 +569,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
for _ in range(1000): for _ in range(1000):
selected_model = await router.async_get_available_deployment( selected_model = await router.async_get_available_deployment(
"gpt-3.5-turbo" "gpt-3.5-turbo", request_kwargs={}
) )
selected_model_id = selected_model["litellm_params"]["model"] selected_model_id = selected_model["litellm_params"]["model"]
selected_model_name = selected_model_id selected_model_name = selected_model_id

View file

@ -26,11 +26,6 @@ import litellm
from litellm import Router from litellm import Router
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
verbose_logger.setLevel(logging.DEBUG)
load_dotenv()
@pytest.mark.asyncio() @pytest.mark.asyncio()
async def test_router_free_paid_tier(): async def test_router_free_paid_tier():
@ -93,6 +88,69 @@ async def test_router_free_paid_tier():
assert response_extra_info["model_id"] == "very-expensive-model" assert response_extra_info["model_id"] == "very-expensive-model"
@pytest.mark.asyncio()
async def test_router_free_paid_tier_embeddings():
"""
Pass list of orgs in 1 model definition,
expect a unique deployment for each to be created
"""
router = litellm.Router(
model_list=[
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"tags": ["free"],
"mock_response": ["1", "2", "3"],
},
"model_info": {"id": "very-cheap-model"},
},
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o-mini",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"tags": ["paid"],
"mock_response": ["1", "2", "3"],
},
"model_info": {"id": "very-expensive-model"},
},
],
enable_tag_filtering=True,
)
for _ in range(1):
# this should pick model with id == very-cheap-model
response = await router.aembedding(
model="gpt-4",
input="Tell me a joke.",
metadata={"tags": ["free"]},
)
print("Response: ", response)
response_extra_info = response._hidden_params
print("response_extra_info: ", response_extra_info)
assert response_extra_info["model_id"] == "very-cheap-model"
for _ in range(5):
# this should pick model with id == very-cheap-model
response = await router.aembedding(
model="gpt-4",
input="Tell me a joke.",
metadata={"tags": ["paid"]},
)
print("Response: ", response)
response_extra_info = response._hidden_params
print("response_extra_info: ", response_extra_info)
assert response_extra_info["model_id"] == "very-expensive-model"
@pytest.mark.asyncio() @pytest.mark.asyncio()
async def test_default_tagged_deployments(): async def test_default_tagged_deployments():
""" """

View file

@ -377,6 +377,7 @@ async def test_multiple_potential_deployments(sync_mode):
deployment = await router.async_get_available_deployment( deployment = await router.async_get_available_deployment(
model="azure-model", model="azure-model",
messages=[{"role": "user", "content": "Hey, how's it going?"}], messages=[{"role": "user", "content": "Hey, how's it going?"}],
request_kwargs={},
) )
## get id ## ## get id ##