diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 693e44ac77..91d198b601 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -62,10 +62,16 @@ def _get_metadata_variable_name(request: Request) -> str: """ if RouteChecks._is_assistants_api_request(request): return "litellm_metadata" - if "batches" in request.url.path: - return "litellm_metadata" - if "/v1/messages" in request.url.path: - # anthropic API has a field called metadata + LITELLM_METADATA_ROUTES = [ + "batches" "/v1/messages", + "responses", + ] + if any( + [ + litellm_metadata_route in request.url.path + for litellm_metadata_route in LITELLM_METADATA_ROUTES + ] + ): return "litellm_metadata" else: return "metadata" diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index b64bd84aad..c5add9ee09 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,10 +1,6 @@ model_list: - - model_name: thinking-us.anthropic.claude-3-7-sonnet-20250219-v1:0 + - model_name: gpt-4o litellm_params: - model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0 - thinking: {"type": "enabled", "budget_tokens": 1024} - max_tokens: 1080 - merge_reasoning_content_in_choices: true - + model: gpt-4o diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index d39c5e8182..d866fe1e75 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -233,6 +233,7 @@ from litellm.proxy.pass_through_endpoints.pass_through_endpoints import ( router as pass_through_router, ) from litellm.proxy.rerank_endpoints.endpoints import router as rerank_router +from litellm.proxy.response_api_endpoints.endpoints import router as response_router from litellm.proxy.route_llm_request import route_request from litellm.proxy.spend_tracking.spend_management_endpoints import ( router as spend_management_router, @@ -8390,6 +8391,7 @@ async def get_routes(): app.include_router(router) +app.include_router(response_router) app.include_router(batches_router) app.include_router(rerank_router) app.include_router(fine_tuning_router) diff --git a/litellm/proxy/response_api_endpoints/endpoints.py b/litellm/proxy/response_api_endpoints/endpoints.py index fb84793992..b3b5a8697b 100644 --- a/litellm/proxy/response_api_endpoints/endpoints.py +++ b/litellm/proxy/response_api_endpoints/endpoints.py @@ -5,7 +5,6 @@ from litellm._logging import verbose_proxy_logger from litellm.proxy._types import * from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth, user_api_key_auth from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing -from litellm.proxy.proxy_server import _read_request_body, select_data_generator router = APIRouter() @@ -44,10 +43,12 @@ async def responses_api( ``` """ from litellm.proxy.proxy_server import ( + _read_request_body, general_settings, llm_router, proxy_config, proxy_logging_obj, + select_data_generator, user_api_base, user_max_tokens, user_model,