diff --git a/litellm/integrations/langfuse/langfuse_prompt_management.py b/litellm/integrations/langfuse/langfuse_prompt_management.py
index cc2a6cf80d..1f4ca84db3 100644
--- a/litellm/integrations/langfuse/langfuse_prompt_management.py
+++ b/litellm/integrations/langfuse/langfuse_prompt_management.py
@@ -40,6 +40,7 @@ in_memory_dynamic_logger_cache = DynamicLoggingCache()
def langfuse_client_init(
langfuse_public_key=None,
langfuse_secret=None,
+ langfuse_secret_key=None,
langfuse_host=None,
flush_interval=1,
) -> LangfuseClass:
@@ -67,7 +68,10 @@ def langfuse_client_init(
)
# Instance variables
- secret_key = langfuse_secret or os.getenv("LANGFUSE_SECRET_KEY")
+
+ secret_key = (
+ langfuse_secret or langfuse_secret_key or os.getenv("LANGFUSE_SECRET_KEY")
+ )
public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY")
langfuse_host = langfuse_host or os.getenv(
"LANGFUSE_HOST", "https://cloud.langfuse.com"
@@ -190,6 +194,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
langfuse_client = langfuse_client_init(
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
+ langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"),
langfuse_host=dynamic_callback_params.get("langfuse_host"),
)
langfuse_prompt_client = self._get_prompt_from_id(
@@ -206,6 +211,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
langfuse_client = langfuse_client_init(
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
+ langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"),
langfuse_host=dynamic_callback_params.get("langfuse_host"),
)
langfuse_prompt_client = self._get_prompt_from_id(
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
deleted file mode 100644
index 3b4d5d430f..0000000000
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ /dev/null
@@ -1 +0,0 @@
-
LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index ba355c0dc4..c01d2e7751 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,11 +1,9 @@
model_list:
- - model_name: openai-gpt-4o-mini-2024-07-18
+ - model_name: my-langfuse-model
litellm_params:
- model: openai/gpt-4o-mini-2024-07-18
- configurable_clientside_auth_params: ["api_key"]
- api_key: "my-bad-key"
- # - model_name: openai-fallback-model
- # litellm_params:
- # model: openai/gpt-3.5-turbo
-
-
+ model: langfuse/openai-model
+ api_key: os.environ/OPENAI_API_KEY
+ - model_name: openai-model
+ litellm_params:
+ model: openai/gpt-3.5-turbo
+ api_key: os.environ/OPENAI_API_KEY
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 6a427fed8a..ba27de78be 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -1003,6 +1003,7 @@ class AddTeamCallback(LiteLLMPydanticObjectBase):
class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
success_callback: Optional[List[str]] = []
failure_callback: Optional[List[str]] = []
+ callbacks: Optional[List[str]] = []
# for now - only supported for langfuse
callback_vars: Optional[Dict[str, str]] = {}
@@ -1015,6 +1016,9 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
failure_callback = values.get("failure_callback", [])
if failure_callback is None:
values.pop("failure_callback", None)
+ callbacks = values.get("callbacks", [])
+ if callbacks is None:
+ values.pop("callbacks", None)
callback_vars = values.get("callback_vars", {})
if callback_vars is None:
@@ -1023,6 +1027,7 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
return {
"success_callback": [],
"failure_callback": [],
+ "callbacks": [],
"callback_vars": {},
}
valid_keys = set(StandardCallbackDynamicParams.__annotations__.keys())
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index c15bbbd4a7..693e44ac77 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -102,11 +102,15 @@ def convert_key_logging_metadata_to_callback(
if data.callback_name not in team_callback_settings_obj.failure_callback:
team_callback_settings_obj.failure_callback.append(data.callback_name)
- elif data.callback_type == "success_and_failure":
+ elif (
+ not data.callback_type or data.callback_type == "success_and_failure"
+ ): # assume 'success_and_failure' = litellm.callbacks
if team_callback_settings_obj.success_callback is None:
team_callback_settings_obj.success_callback = []
if team_callback_settings_obj.failure_callback is None:
team_callback_settings_obj.failure_callback = []
+ if team_callback_settings_obj.callbacks is None:
+ team_callback_settings_obj.callbacks = []
if data.callback_name not in team_callback_settings_obj.success_callback:
team_callback_settings_obj.success_callback.append(data.callback_name)
@@ -114,6 +118,9 @@ def convert_key_logging_metadata_to_callback(
if data.callback_name not in team_callback_settings_obj.failure_callback:
team_callback_settings_obj.failure_callback.append(data.callback_name)
+ if data.callback_name not in team_callback_settings_obj.callbacks:
+ team_callback_settings_obj.callbacks.append(data.callback_name)
+
for var, value in data.callback_vars.items():
if team_callback_settings_obj.callback_vars is None:
team_callback_settings_obj.callback_vars = {}
diff --git a/litellm/router.py b/litellm/router.py
index 880950b227..8b0bbd6b9f 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -1750,6 +1750,7 @@ class Router:
model=model,
messages=[{"role": "user", "content": "prompt"}],
specific_deployment=kwargs.pop("specific_deployment", None),
+ request_kwargs=kwargs,
)
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
@@ -1863,6 +1864,7 @@ class Router:
model=model,
messages=[{"role": "user", "content": "prompt"}],
specific_deployment=kwargs.pop("specific_deployment", None),
+ request_kwargs=kwargs,
)
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
@@ -1961,6 +1963,7 @@ class Router:
model=model,
messages=[{"role": "user", "content": "prompt"}],
specific_deployment=kwargs.pop("specific_deployment", None),
+ request_kwargs=kwargs,
)
self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
data = deployment["litellm_params"].copy()
@@ -2036,6 +2039,7 @@ class Router:
deployment = await self.async_get_available_deployment(
model=model,
specific_deployment=kwargs.pop("specific_deployment", None),
+ request_kwargs=kwargs,
)
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
data = deployment["litellm_params"].copy()
@@ -2080,6 +2084,7 @@ class Router:
model=model,
messages=messages,
specific_deployment=kwargs.pop("specific_deployment", None),
+ request_kwargs=kwargs,
)
data = deployment["litellm_params"].copy()
@@ -2185,6 +2190,7 @@ class Router:
model=model,
messages=[{"role": "user", "content": prompt}],
specific_deployment=kwargs.pop("specific_deployment", None),
+ request_kwargs=kwargs,
)
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
@@ -2283,6 +2289,7 @@ class Router:
model=model,
messages=[{"role": "user", "content": "default text"}],
specific_deployment=kwargs.pop("specific_deployment", None),
+ request_kwargs=kwargs,
)
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
@@ -2452,6 +2459,7 @@ class Router:
model=model,
input=input,
specific_deployment=kwargs.pop("specific_deployment", None),
+ request_kwargs=kwargs,
)
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
data = deployment["litellm_params"].copy()
@@ -2549,6 +2557,7 @@ class Router:
model=model,
messages=[{"role": "user", "content": "files-api-fake-text"}],
specific_deployment=kwargs.pop("specific_deployment", None),
+ request_kwargs=kwargs,
)
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
@@ -2654,6 +2663,7 @@ class Router:
model=model,
messages=[{"role": "user", "content": "files-api-fake-text"}],
specific_deployment=kwargs.pop("specific_deployment", None),
+ request_kwargs=kwargs,
)
metadata_variable_name = _get_router_metadata_variable_name(
function_name="_acreate_batch"
@@ -2850,7 +2860,8 @@ class Router:
):
if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]):
deployment = await self.async_get_available_deployment(
- model=kwargs["model"]
+ model=kwargs["model"],
+ request_kwargs=kwargs,
)
kwargs["model"] = deployment["litellm_params"]["model"]
return await original_function(**kwargs)
@@ -5590,10 +5601,10 @@ class Router:
async def async_get_available_deployment(
self,
model: str,
+ request_kwargs: Dict,
messages: Optional[List[Dict[str, str]]] = None,
input: Optional[Union[str, List]] = None,
specific_deployment: Optional[bool] = False,
- request_kwargs: Optional[Dict] = None,
):
"""
Async implementation of 'get_available_deployments'.
diff --git a/litellm/utils.py b/litellm/utils.py
index 495b0d45a6..cbd5e2d0d3 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -451,6 +451,15 @@ def get_applied_guardrails(kwargs: Dict[str, Any]) -> List[str]:
return applied_guardrails
+def get_dynamic_callbacks(
+ dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]]
+) -> List:
+ returned_callbacks = litellm.callbacks.copy()
+ if dynamic_callbacks:
+ returned_callbacks.extend(dynamic_callbacks) # type: ignore
+ return returned_callbacks
+
+
def function_setup( # noqa: PLR0915
original_function: str, rules_obj, start_time, *args, **kwargs
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
@@ -475,12 +484,18 @@ def function_setup( # noqa: PLR0915
## LOGGING SETUP
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
- if len(litellm.callbacks) > 0:
- for callback in litellm.callbacks:
+ ## DYNAMIC CALLBACKS ##
+ dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
+ kwargs.pop("callbacks", None)
+ )
+ all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
+
+ if len(all_callbacks) > 0:
+ for callback in all_callbacks:
# check if callback is a string - e.g. "lago", "openmeter"
if isinstance(callback, str):
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
- callback, internal_usage_cache=None, llm_router=None
+ callback, internal_usage_cache=None, llm_router=None # type: ignore
)
if callback is None or any(
isinstance(cb, type(callback))
diff --git a/tests/local_testing/test_least_busy_routing.py b/tests/local_testing/test_least_busy_routing.py
index c9c6eb6093..cf69f596d9 100644
--- a/tests/local_testing/test_least_busy_routing.py
+++ b/tests/local_testing/test_least_busy_routing.py
@@ -119,7 +119,7 @@ async def test_router_get_available_deployments(async_test):
if async_test is True:
await router.cache.async_set_cache(key=cache_key, value=request_count_dict)
deployment = await router.async_get_available_deployment(
- model=model_group, messages=None
+ model=model_group, messages=None, request_kwargs={}
)
else:
router.cache.set_cache(key=cache_key, value=request_count_dict)
diff --git a/tests/local_testing/test_router_get_deployments.py b/tests/local_testing/test_router_get_deployments.py
index d57ef0b81d..efbb5d16e7 100644
--- a/tests/local_testing/test_router_get_deployments.py
+++ b/tests/local_testing/test_router_get_deployments.py
@@ -569,7 +569,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
for _ in range(1000):
selected_model = await router.async_get_available_deployment(
- "gpt-3.5-turbo"
+ "gpt-3.5-turbo", request_kwargs={}
)
selected_model_id = selected_model["litellm_params"]["model"]
selected_model_name = selected_model_id
diff --git a/tests/local_testing/test_router_tag_routing.py b/tests/local_testing/test_router_tag_routing.py
index 47f2a0a8f9..4e30e1d8b6 100644
--- a/tests/local_testing/test_router_tag_routing.py
+++ b/tests/local_testing/test_router_tag_routing.py
@@ -26,11 +26,6 @@ import litellm
from litellm import Router
from litellm._logging import verbose_logger
-verbose_logger.setLevel(logging.DEBUG)
-
-
-load_dotenv()
-
@pytest.mark.asyncio()
async def test_router_free_paid_tier():
@@ -93,6 +88,69 @@ async def test_router_free_paid_tier():
assert response_extra_info["model_id"] == "very-expensive-model"
+@pytest.mark.asyncio()
+async def test_router_free_paid_tier_embeddings():
+ """
+ Pass list of orgs in 1 model definition,
+ expect a unique deployment for each to be created
+ """
+ router = litellm.Router(
+ model_list=[
+ {
+ "model_name": "gpt-4",
+ "litellm_params": {
+ "model": "gpt-4o",
+ "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+ "tags": ["free"],
+ "mock_response": ["1", "2", "3"],
+ },
+ "model_info": {"id": "very-cheap-model"},
+ },
+ {
+ "model_name": "gpt-4",
+ "litellm_params": {
+ "model": "gpt-4o-mini",
+ "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+ "tags": ["paid"],
+ "mock_response": ["1", "2", "3"],
+ },
+ "model_info": {"id": "very-expensive-model"},
+ },
+ ],
+ enable_tag_filtering=True,
+ )
+
+ for _ in range(1):
+ # this should pick model with id == very-cheap-model
+ response = await router.aembedding(
+ model="gpt-4",
+ input="Tell me a joke.",
+ metadata={"tags": ["free"]},
+ )
+
+ print("Response: ", response)
+
+ response_extra_info = response._hidden_params
+ print("response_extra_info: ", response_extra_info)
+
+ assert response_extra_info["model_id"] == "very-cheap-model"
+
+ for _ in range(5):
+ # this should pick model with id == very-cheap-model
+ response = await router.aembedding(
+ model="gpt-4",
+ input="Tell me a joke.",
+ metadata={"tags": ["paid"]},
+ )
+
+ print("Response: ", response)
+
+ response_extra_info = response._hidden_params
+ print("response_extra_info: ", response_extra_info)
+
+ assert response_extra_info["model_id"] == "very-expensive-model"
+
+
@pytest.mark.asyncio()
async def test_default_tagged_deployments():
"""
diff --git a/tests/local_testing/test_tpm_rpm_routing_v2.py b/tests/local_testing/test_tpm_rpm_routing_v2.py
index 879e8ee5dd..a7073b4acd 100644
--- a/tests/local_testing/test_tpm_rpm_routing_v2.py
+++ b/tests/local_testing/test_tpm_rpm_routing_v2.py
@@ -377,6 +377,7 @@ async def test_multiple_potential_deployments(sync_mode):
deployment = await router.async_get_available_deployment(
model="azure-model",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
+ request_kwargs={},
)
## get id ##