diff --git a/litellm/integrations/langfuse/langfuse_prompt_management.py b/litellm/integrations/langfuse/langfuse_prompt_management.py index cc2a6cf80d..1f4ca84db3 100644 --- a/litellm/integrations/langfuse/langfuse_prompt_management.py +++ b/litellm/integrations/langfuse/langfuse_prompt_management.py @@ -40,6 +40,7 @@ in_memory_dynamic_logger_cache = DynamicLoggingCache() def langfuse_client_init( langfuse_public_key=None, langfuse_secret=None, + langfuse_secret_key=None, langfuse_host=None, flush_interval=1, ) -> LangfuseClass: @@ -67,7 +68,10 @@ def langfuse_client_init( ) # Instance variables - secret_key = langfuse_secret or os.getenv("LANGFUSE_SECRET_KEY") + + secret_key = ( + langfuse_secret or langfuse_secret_key or os.getenv("LANGFUSE_SECRET_KEY") + ) public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY") langfuse_host = langfuse_host or os.getenv( "LANGFUSE_HOST", "https://cloud.langfuse.com" @@ -190,6 +194,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge langfuse_client = langfuse_client_init( langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"), langfuse_secret=dynamic_callback_params.get("langfuse_secret"), + langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"), langfuse_host=dynamic_callback_params.get("langfuse_host"), ) langfuse_prompt_client = self._get_prompt_from_id( @@ -206,6 +211,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge langfuse_client = langfuse_client_init( langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"), langfuse_secret=dynamic_callback_params.get("langfuse_secret"), + langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"), langfuse_host=dynamic_callback_params.get("langfuse_host"), ) langfuse_prompt_client = self._get_prompt_from_id( diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index 3b4d5d430f..0000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index ba355c0dc4..c01d2e7751 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,11 +1,9 @@ model_list: - - model_name: openai-gpt-4o-mini-2024-07-18 + - model_name: my-langfuse-model litellm_params: - model: openai/gpt-4o-mini-2024-07-18 - configurable_clientside_auth_params: ["api_key"] - api_key: "my-bad-key" - # - model_name: openai-fallback-model - # litellm_params: - # model: openai/gpt-3.5-turbo - - + model: langfuse/openai-model + api_key: os.environ/OPENAI_API_KEY + - model_name: openai-model + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 6a427fed8a..ba27de78be 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -1003,6 +1003,7 @@ class AddTeamCallback(LiteLLMPydanticObjectBase): class TeamCallbackMetadata(LiteLLMPydanticObjectBase): success_callback: Optional[List[str]] = [] failure_callback: Optional[List[str]] = [] + callbacks: Optional[List[str]] = [] # for now - only supported for langfuse callback_vars: Optional[Dict[str, str]] = {} @@ -1015,6 +1016,9 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase): failure_callback = values.get("failure_callback", []) if failure_callback is None: values.pop("failure_callback", None) + callbacks = values.get("callbacks", []) + if callbacks is None: + values.pop("callbacks", None) callback_vars = values.get("callback_vars", {}) if callback_vars is None: @@ -1023,6 +1027,7 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase): return { "success_callback": [], "failure_callback": [], + "callbacks": [], "callback_vars": {}, } valid_keys = set(StandardCallbackDynamicParams.__annotations__.keys()) diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index c15bbbd4a7..693e44ac77 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -102,11 +102,15 @@ def convert_key_logging_metadata_to_callback( if data.callback_name not in team_callback_settings_obj.failure_callback: team_callback_settings_obj.failure_callback.append(data.callback_name) - elif data.callback_type == "success_and_failure": + elif ( + not data.callback_type or data.callback_type == "success_and_failure" + ): # assume 'success_and_failure' = litellm.callbacks if team_callback_settings_obj.success_callback is None: team_callback_settings_obj.success_callback = [] if team_callback_settings_obj.failure_callback is None: team_callback_settings_obj.failure_callback = [] + if team_callback_settings_obj.callbacks is None: + team_callback_settings_obj.callbacks = [] if data.callback_name not in team_callback_settings_obj.success_callback: team_callback_settings_obj.success_callback.append(data.callback_name) @@ -114,6 +118,9 @@ def convert_key_logging_metadata_to_callback( if data.callback_name not in team_callback_settings_obj.failure_callback: team_callback_settings_obj.failure_callback.append(data.callback_name) + if data.callback_name not in team_callback_settings_obj.callbacks: + team_callback_settings_obj.callbacks.append(data.callback_name) + for var, value in data.callback_vars.items(): if team_callback_settings_obj.callback_vars is None: team_callback_settings_obj.callback_vars = {} diff --git a/litellm/router.py b/litellm/router.py index 880950b227..8b0bbd6b9f 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -1750,6 +1750,7 @@ class Router: model=model, messages=[{"role": "user", "content": "prompt"}], specific_deployment=kwargs.pop("specific_deployment", None), + request_kwargs=kwargs, ) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) @@ -1863,6 +1864,7 @@ class Router: model=model, messages=[{"role": "user", "content": "prompt"}], specific_deployment=kwargs.pop("specific_deployment", None), + request_kwargs=kwargs, ) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) @@ -1961,6 +1963,7 @@ class Router: model=model, messages=[{"role": "user", "content": "prompt"}], specific_deployment=kwargs.pop("specific_deployment", None), + request_kwargs=kwargs, ) self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs) data = deployment["litellm_params"].copy() @@ -2036,6 +2039,7 @@ class Router: deployment = await self.async_get_available_deployment( model=model, specific_deployment=kwargs.pop("specific_deployment", None), + request_kwargs=kwargs, ) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) data = deployment["litellm_params"].copy() @@ -2080,6 +2084,7 @@ class Router: model=model, messages=messages, specific_deployment=kwargs.pop("specific_deployment", None), + request_kwargs=kwargs, ) data = deployment["litellm_params"].copy() @@ -2185,6 +2190,7 @@ class Router: model=model, messages=[{"role": "user", "content": prompt}], specific_deployment=kwargs.pop("specific_deployment", None), + request_kwargs=kwargs, ) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) @@ -2283,6 +2289,7 @@ class Router: model=model, messages=[{"role": "user", "content": "default text"}], specific_deployment=kwargs.pop("specific_deployment", None), + request_kwargs=kwargs, ) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) @@ -2452,6 +2459,7 @@ class Router: model=model, input=input, specific_deployment=kwargs.pop("specific_deployment", None), + request_kwargs=kwargs, ) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) data = deployment["litellm_params"].copy() @@ -2549,6 +2557,7 @@ class Router: model=model, messages=[{"role": "user", "content": "files-api-fake-text"}], specific_deployment=kwargs.pop("specific_deployment", None), + request_kwargs=kwargs, ) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) @@ -2654,6 +2663,7 @@ class Router: model=model, messages=[{"role": "user", "content": "files-api-fake-text"}], specific_deployment=kwargs.pop("specific_deployment", None), + request_kwargs=kwargs, ) metadata_variable_name = _get_router_metadata_variable_name( function_name="_acreate_batch" @@ -2850,7 +2860,8 @@ class Router: ): if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]): deployment = await self.async_get_available_deployment( - model=kwargs["model"] + model=kwargs["model"], + request_kwargs=kwargs, ) kwargs["model"] = deployment["litellm_params"]["model"] return await original_function(**kwargs) @@ -5590,10 +5601,10 @@ class Router: async def async_get_available_deployment( self, model: str, + request_kwargs: Dict, messages: Optional[List[Dict[str, str]]] = None, input: Optional[Union[str, List]] = None, specific_deployment: Optional[bool] = False, - request_kwargs: Optional[Dict] = None, ): """ Async implementation of 'get_available_deployments'. diff --git a/litellm/utils.py b/litellm/utils.py index 495b0d45a6..cbd5e2d0d3 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -451,6 +451,15 @@ def get_applied_guardrails(kwargs: Dict[str, Any]) -> List[str]: return applied_guardrails +def get_dynamic_callbacks( + dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] +) -> List: + returned_callbacks = litellm.callbacks.copy() + if dynamic_callbacks: + returned_callbacks.extend(dynamic_callbacks) # type: ignore + return returned_callbacks + + def function_setup( # noqa: PLR0915 original_function: str, rules_obj, start_time, *args, **kwargs ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. @@ -475,12 +484,18 @@ def function_setup( # noqa: PLR0915 ## LOGGING SETUP function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None - if len(litellm.callbacks) > 0: - for callback in litellm.callbacks: + ## DYNAMIC CALLBACKS ## + dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = ( + kwargs.pop("callbacks", None) + ) + all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks) + + if len(all_callbacks) > 0: + for callback in all_callbacks: # check if callback is a string - e.g. "lago", "openmeter" if isinstance(callback, str): callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore - callback, internal_usage_cache=None, llm_router=None + callback, internal_usage_cache=None, llm_router=None # type: ignore ) if callback is None or any( isinstance(cb, type(callback)) diff --git a/tests/local_testing/test_least_busy_routing.py b/tests/local_testing/test_least_busy_routing.py index c9c6eb6093..cf69f596d9 100644 --- a/tests/local_testing/test_least_busy_routing.py +++ b/tests/local_testing/test_least_busy_routing.py @@ -119,7 +119,7 @@ async def test_router_get_available_deployments(async_test): if async_test is True: await router.cache.async_set_cache(key=cache_key, value=request_count_dict) deployment = await router.async_get_available_deployment( - model=model_group, messages=None + model=model_group, messages=None, request_kwargs={} ) else: router.cache.set_cache(key=cache_key, value=request_count_dict) diff --git a/tests/local_testing/test_router_get_deployments.py b/tests/local_testing/test_router_get_deployments.py index d57ef0b81d..efbb5d16e7 100644 --- a/tests/local_testing/test_router_get_deployments.py +++ b/tests/local_testing/test_router_get_deployments.py @@ -569,7 +569,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list): # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time for _ in range(1000): selected_model = await router.async_get_available_deployment( - "gpt-3.5-turbo" + "gpt-3.5-turbo", request_kwargs={} ) selected_model_id = selected_model["litellm_params"]["model"] selected_model_name = selected_model_id diff --git a/tests/local_testing/test_router_tag_routing.py b/tests/local_testing/test_router_tag_routing.py index 47f2a0a8f9..4e30e1d8b6 100644 --- a/tests/local_testing/test_router_tag_routing.py +++ b/tests/local_testing/test_router_tag_routing.py @@ -26,11 +26,6 @@ import litellm from litellm import Router from litellm._logging import verbose_logger -verbose_logger.setLevel(logging.DEBUG) - - -load_dotenv() - @pytest.mark.asyncio() async def test_router_free_paid_tier(): @@ -93,6 +88,69 @@ async def test_router_free_paid_tier(): assert response_extra_info["model_id"] == "very-expensive-model" +@pytest.mark.asyncio() +async def test_router_free_paid_tier_embeddings(): + """ + Pass list of orgs in 1 model definition, + expect a unique deployment for each to be created + """ + router = litellm.Router( + model_list=[ + { + "model_name": "gpt-4", + "litellm_params": { + "model": "gpt-4o", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + "tags": ["free"], + "mock_response": ["1", "2", "3"], + }, + "model_info": {"id": "very-cheap-model"}, + }, + { + "model_name": "gpt-4", + "litellm_params": { + "model": "gpt-4o-mini", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + "tags": ["paid"], + "mock_response": ["1", "2", "3"], + }, + "model_info": {"id": "very-expensive-model"}, + }, + ], + enable_tag_filtering=True, + ) + + for _ in range(1): + # this should pick model with id == very-cheap-model + response = await router.aembedding( + model="gpt-4", + input="Tell me a joke.", + metadata={"tags": ["free"]}, + ) + + print("Response: ", response) + + response_extra_info = response._hidden_params + print("response_extra_info: ", response_extra_info) + + assert response_extra_info["model_id"] == "very-cheap-model" + + for _ in range(5): + # this should pick model with id == very-cheap-model + response = await router.aembedding( + model="gpt-4", + input="Tell me a joke.", + metadata={"tags": ["paid"]}, + ) + + print("Response: ", response) + + response_extra_info = response._hidden_params + print("response_extra_info: ", response_extra_info) + + assert response_extra_info["model_id"] == "very-expensive-model" + + @pytest.mark.asyncio() async def test_default_tagged_deployments(): """ diff --git a/tests/local_testing/test_tpm_rpm_routing_v2.py b/tests/local_testing/test_tpm_rpm_routing_v2.py index 879e8ee5dd..a7073b4acd 100644 --- a/tests/local_testing/test_tpm_rpm_routing_v2.py +++ b/tests/local_testing/test_tpm_rpm_routing_v2.py @@ -377,6 +377,7 @@ async def test_multiple_potential_deployments(sync_mode): deployment = await router.async_get_available_deployment( model="azure-model", messages=[{"role": "user", "content": "Hey, how's it going?"}], + request_kwargs={}, ) ## get id ##