diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 4a1fc84a8..d25f1b946 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -19,6 +19,9 @@ model_list: litellm_params: model: mistral/mistral-small-latest api_key: "os.environ/MISTRAL_API_KEY" + - model_name: bedrock-anthropic + litellm_params: + model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0 - model_name: gemini-1.5-pro-001 litellm_params: model: vertex_ai_beta/gemini-1.5-pro-001 @@ -40,3 +43,6 @@ general_settings: litellm_settings: fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}] callbacks: ["gcs_bucket"] + success_callback: ["langfuse"] + langfuse_default_tags: ["cache_hit", "cache_key", "user_api_key_alias", "user_api_key_team_alias"] + cache: True diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index b637bee21..689b0a3c5 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -187,6 +187,7 @@ from litellm.proxy.openai_files_endpoints.files_endpoints import set_files_confi from litellm.proxy.pass_through_endpoints.pass_through_endpoints import ( initialize_pass_through_endpoints, ) +from litellm.proxy.route_llm_request import route_request from litellm.proxy.secret_managers.aws_secret_manager import ( load_aws_kms, load_aws_secret_manager, @@ -3006,68 +3007,13 @@ async def chat_completion( ### ROUTE THE REQUEST ### # Do not change this - it should be a constant time fetch - ALWAYS - router_model_names = llm_router.model_names if llm_router is not None else [] - # skip router if user passed their key - if "api_key" in data: - tasks.append(litellm.acompletion(**data)) - elif "," in data["model"] and llm_router is not None: - if ( - data.get("fastest_response", None) is not None - and data["fastest_response"] == True - ): - tasks.append(llm_router.abatch_completion_fastest_response(**data)) - else: - _models_csv_string = data.pop("model") - _models = [model.strip() for model in _models_csv_string.split(",")] - tasks.append(llm_router.abatch_completion(models=_models, **data)) - elif "user_config" in data: - # initialize a new router instance. make request using this Router - router_config = data.pop("user_config") - user_router = litellm.Router(**router_config) - tasks.append(user_router.acompletion(**data)) - elif ( - llm_router is not None and data["model"] in router_model_names - ): # model in router model list - tasks.append(llm_router.acompletion(**data)) - elif ( - llm_router is not None and data["model"] in llm_router.get_model_ids() - ): # model in router model list - tasks.append(llm_router.acompletion(**data)) - elif ( - llm_router is not None - and llm_router.model_group_alias is not None - and data["model"] in llm_router.model_group_alias - ): # model set in model_group_alias - tasks.append(llm_router.acompletion(**data)) - elif ( - llm_router is not None and data["model"] in llm_router.deployment_names - ): # model in router deployments, calling a specific deployment on the router - tasks.append(llm_router.acompletion(**data, specific_deployment=True)) - elif ( - llm_router is not None - and data["model"] not in router_model_names - and llm_router.router_general_settings.pass_through_all_models is True - ): - tasks.append(litellm.acompletion(**data)) - elif ( - llm_router is not None - and data["model"] not in router_model_names - and ( - llm_router.default_deployment is not None - or len(llm_router.provider_default_deployments) > 0 - ) - ): # model in router deployments, calling a specific deployment on the router - tasks.append(llm_router.acompletion(**data)) - elif user_model is not None: # `litellm --model ` - tasks.append(litellm.acompletion(**data)) - else: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={ - "error": "chat_completion: Invalid model name passed in model=" - + data.get("model", "") - }, - ) + llm_call = await route_request( + data=data, + route_type="acompletion", + llm_router=llm_router, + user_model=user_model, + ) + tasks.append(llm_call) # wait for call to end llm_responses = asyncio.gather( diff --git a/litellm/proxy/route_llm_request.py b/litellm/proxy/route_llm_request.py new file mode 100644 index 000000000..d4af63b28 --- /dev/null +++ b/litellm/proxy/route_llm_request.py @@ -0,0 +1,103 @@ +from typing import TYPE_CHECKING, Any, Literal, Optional, Union + +from fastapi import ( + Depends, + FastAPI, + File, + Form, + Header, + HTTPException, + Path, + Request, + Response, + UploadFile, + status, +) + +import litellm +from litellm._logging import verbose_logger + +if TYPE_CHECKING: + from litellm.router import Router as _Router + + LitellmRouter = _Router +else: + LitellmRouter = Any + + +async def route_request( + data: dict, + llm_router: Optional[LitellmRouter], + user_model: Optional[str], + route_type: Literal[ + "acompletion", + "atext_completion", + "aembedding", + "aimage_generation", + "aspeech", + "atranscription", + "amoderation", + ], +): + """ + Common helper to route the request + + """ + router_model_names = llm_router.model_names if llm_router is not None else [] + + if "api_key" in data: + return await getattr(litellm, f"{route_type}")(**data) + + elif "user_config" in data: + router_config = data.pop("user_config") + user_router = litellm.Router(**router_config) + return await getattr(user_router, f"{route_type}")(**data) + + elif ( + "," in data.get("model", "") + and llm_router is not None + and route_type == "acompletion" + ): + if data.get("fastest_response", False): + return await llm_router.abatch_completion_fastest_response(**data) + else: + models = [model.strip() for model in data.pop("model").split(",")] + return await llm_router.abatch_completion(models=models, **data) + + elif llm_router is not None: + if ( + data["model"] in router_model_names + or data["model"] in llm_router.get_model_ids() + ): + return await getattr(llm_router, f"{route_type}")(**data) + + elif ( + llm_router.model_group_alias is not None + and data["model"] in llm_router.model_group_alias + ): + return await getattr(llm_router, f"{route_type}")(**data) + + elif data["model"] in llm_router.deployment_names: + return await getattr(llm_router, f"{route_type}")( + **data, specific_deployment=True + ) + + elif data["model"] not in router_model_names: + if llm_router.router_general_settings.pass_through_all_models: + return await getattr(litellm, f"{route_type}")(**data) + elif ( + llm_router.default_deployment is not None + or len(llm_router.provider_default_deployments) > 0 + ): + return await getattr(llm_router, f"{route_type}")(**data) + + elif user_model is not None: + return await getattr(litellm, f"{route_type}")(**data) + + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "error": f"{route_type}: Invalid model name passed in model=" + + data.get("model", "") + }, + )