2025-04-25 02:34:29 +00:00
17 changed files with 42 additions and 297 deletions
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@ -108,13 +108,7 @@ class ProxyBaseLLMRequestProcessing:
        user_api_key_dict: UserAPIKeyAuth,
        proxy_logging_obj: ProxyLogging,
        proxy_config: ProxyConfig,
-        route_type: Literal[
+        route_type: Literal["acompletion", "aresponses", "_arealtime"],
            "acompletion",
            "aresponses",
            "_arealtime",
            "aget_responses",
            "adelete_responses",
        ],
        version: Optional[str] = None,
        user_model: Optional[str] = None,
        user_temperature: Optional[float] = None,
@ -184,13 +178,7 @@ class ProxyBaseLLMRequestProcessing:
        request: Request,
        fastapi_response: Response,
        user_api_key_dict: UserAPIKeyAuth,
-        route_type: Literal[
+        route_type: Literal["acompletion", "aresponses", "_arealtime"],
            "acompletion",
            "aresponses",
            "_arealtime",
            "aget_responses",
            "adelete_responses",
        ],
        proxy_logging_obj: ProxyLogging,
        general_settings: dict,
        proxy_config: ProxyConfig,
--- a/litellm/proxy/common_utils/http_parsing_utils.py
+++ b/litellm/proxy/common_utils/http_parsing_utils.py
@ -1,5 +1,5 @@
 import json
-from typing import Any, Dict, List, Optional
+from typing import Dict, List, Optional
 import orjson
 from fastapi import Request, UploadFile, status
@ -147,10 +147,10 @@ def check_file_size_under_limit(
    if llm_router is not None and request_data["model"] in router_model_names:
        try:
-            deployment: Optional[Deployment] = (
+            deployment: Optional[
-                llm_router.get_deployment_by_model_group_name(
+                Deployment
-                    model_group_name=request_data["model"]
+            ] = llm_router.get_deployment_by_model_group_name(
-                )
+                model_group_name=request_data["model"]
            )
            if (
                deployment
@ -185,23 +185,3 @@ def check_file_size_under_limit(
            )
    return True
 async def get_form_data(request: Request) -> Dict[str, Any]:
    """
    Read form data from request
    Handles when OpenAI SDKs pass form keys as `timestamp_granularities[]="word"` instead of `timestamp_granularities=["word", "sentence"]`
    """
    form = await request.form()
    form_data = dict(form)
    parsed_form_data: dict[str, Any] = {}
    for key, value in form_data.items():
        # OpenAI SDKs pass form keys as `timestamp_granularities[]="word"` instead of `timestamp_granularities=["word", "sentence"]`
        if key.endswith("[]"):
            clean_key = key[:-2]
            parsed_form_data.setdefault(clean_key, []).append(value)
        else:
            parsed_form_data[key] = value
    return parsed_form_data
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -1,8 +1,16 @@
 model_list:
-  - model_name: openai/*
+  - model_name: azure-computer-use-preview
    litellm_params:
-      model: openai/*
+      model: azure/computer-use-preview
-      api_key: os.environ/OPENAI_API_KEY
+      api_key: mock-api-key
      api_version: mock-api-version
      api_base: https://mock-endpoint.openai.azure.com
  - model_name: azure-computer-use-preview
    litellm_params:
      model: azure/computer-use-preview-2
      api_key: mock-api-key-2
      api_version: mock-api-version-2
      api_base: https://mock-endpoint-2.openai.azure.com
 router_settings:
  optional_pre_call_checks: ["responses_api_deployment_check"]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -179,7 +179,6 @@ from litellm.proxy.common_utils.html_forms.ui_login import html_form
 from litellm.proxy.common_utils.http_parsing_utils import (
    _read_request_body,
    check_file_size_under_limit,
    get_form_data,
 )
 from litellm.proxy.common_utils.load_config_utils import (
    get_config_file_contents_from_gcs,
@ -805,9 +804,9 @@ model_max_budget_limiter = _PROXY_VirtualKeyModelMaxBudgetLimiter(
    dual_cache=user_api_key_cache
 )
 litellm.logging_callback_manager.add_litellm_callback(model_max_budget_limiter)
-redis_usage_cache: Optional[RedisCache] = (
+redis_usage_cache: Optional[
-    None  # redis cache used for tracking spend, tpm/rpm limits
+    RedisCache
-)
+] = None  # redis cache used for tracking spend, tpm/rpm limits
 user_custom_auth = None
 user_custom_key_generate = None
 user_custom_sso = None
@ -1133,9 +1132,9 @@ async def update_cache(  # noqa: PLR0915
        _id = "team_id:{}".format(team_id)
        try:
            # Fetch the existing cost for the given user
-            existing_spend_obj: Optional[LiteLLM_TeamTable] = (
+            existing_spend_obj: Optional[
-                await user_api_key_cache.async_get_cache(key=_id)
+                LiteLLM_TeamTable
-            )
+            ] = await user_api_key_cache.async_get_cache(key=_id)
            if existing_spend_obj is None:
                # do nothing if team not in api key cache
                return
@ -2807,9 +2806,9 @@ async def initialize(  # noqa: PLR0915
        user_api_base = api_base
        dynamic_config[user_model]["api_base"] = api_base
    if api_version:
-        os.environ["AZURE_API_VERSION"] = (
+        os.environ[
-            api_version  # set this for azure - litellm can read this from the env
+            "AZURE_API_VERSION"
-        )
+        ] = api_version  # set this for azure - litellm can read this from the env
    if max_tokens:  # model-specific param
        dynamic_config[user_model]["max_tokens"] = max_tokens
    if temperature:  # model-specific param
@ -4121,7 +4120,7 @@ async def audio_transcriptions(
    data: Dict = {}
    try:
        # Use orjson to parse JSON data, orjson speeds up requests significantly
-        form_data = await get_form_data(request)
+        form_data = await request.form()
        data = {key: value for key, value in form_data.items() if key != "file"}
        # Include original request and headers in the data
@ -7759,9 +7758,9 @@ async def get_config_list(
                            hasattr(sub_field_info, "description")
                            and sub_field_info.description is not None
                        ):
-                            nested_fields[idx].field_description = (
+                            nested_fields[
-                                sub_field_info.description
+                                idx
-                            )
+                            ].field_description = sub_field_info.description
                        idx += 1
                    _stored_in_db = None
--- a/litellm/proxy/response_api_endpoints/endpoints.py
+++ b/litellm/proxy/response_api_endpoints/endpoints.py
@ -106,50 +106,8 @@ async def get_response(
    -H "Authorization: Bearer sk-1234"
    ```
    """
-    from litellm.proxy.proxy_server import (
+    # TODO: Implement response retrieval logic
-        _read_request_body,
+    pass
        general_settings,
        llm_router,
        proxy_config,
        proxy_logging_obj,
        select_data_generator,
        user_api_base,
        user_max_tokens,
        user_model,
        user_request_timeout,
        user_temperature,
        version,
    )
    data = await _read_request_body(request=request)
    data["response_id"] = response_id
    processor = ProxyBaseLLMRequestProcessing(data=data)
    try:
        return await processor.base_process_llm_request(
            request=request,
            fastapi_response=fastapi_response,
            user_api_key_dict=user_api_key_dict,
            route_type="aget_responses",
            proxy_logging_obj=proxy_logging_obj,
            llm_router=llm_router,
            general_settings=general_settings,
            proxy_config=proxy_config,
            select_data_generator=select_data_generator,
            model=None,
            user_model=user_model,
            user_temperature=user_temperature,
            user_request_timeout=user_request_timeout,
            user_max_tokens=user_max_tokens,
            user_api_base=user_api_base,
            version=version,
        )
    except Exception as e:
        raise await processor._handle_llm_api_exception(
            e=e,
            user_api_key_dict=user_api_key_dict,
            proxy_logging_obj=proxy_logging_obj,
            version=version,
        )
@router.delete(
@ -178,50 +136,8 @@ async def delete_response(
    -H "Authorization: Bearer sk-1234"
    ```
    """
-    from litellm.proxy.proxy_server import (
+    # TODO: Implement response deletion logic
-        _read_request_body,
+    pass
        general_settings,
        llm_router,
        proxy_config,
        proxy_logging_obj,
        select_data_generator,
        user_api_base,
        user_max_tokens,
        user_model,
        user_request_timeout,
        user_temperature,
        version,
    )
    data = await _read_request_body(request=request)
    data["response_id"] = response_id
    processor = ProxyBaseLLMRequestProcessing(data=data)
    try:
        return await processor.base_process_llm_request(
            request=request,
            fastapi_response=fastapi_response,
            user_api_key_dict=user_api_key_dict,
            route_type="adelete_responses",
            proxy_logging_obj=proxy_logging_obj,
            llm_router=llm_router,
            general_settings=general_settings,
            proxy_config=proxy_config,
            select_data_generator=select_data_generator,
            model=None,
            user_model=user_model,
            user_temperature=user_temperature,
            user_request_timeout=user_request_timeout,
            user_max_tokens=user_max_tokens,
            user_api_base=user_api_base,
            version=version,
        )
    except Exception as e:
        raise await processor._handle_llm_api_exception(
            e=e,
            user_api_key_dict=user_api_key_dict,
            proxy_logging_obj=proxy_logging_obj,
            version=version,
        )
@router.get(
--- a/litellm/proxy/route_llm_request.py
+++ b/litellm/proxy/route_llm_request.py
@ -47,8 +47,6 @@ async def route_request(
        "amoderation",
        "arerank",
        "aresponses",
        "aget_responses",
        "adelete_responses",
        "_arealtime",  # private function for realtime API
    ],
 ):
--- a/litellm/responses/utils.py
+++ b/litellm/responses/utils.py
@ -176,16 +176,6 @@ class ResponsesAPIRequestUtils:
                response_id=response_id,
            )
    @staticmethod
    def get_model_id_from_response_id(response_id: Optional[str]) -> Optional[str]:
        """Get the model_id from the response_id"""
        if response_id is None:
            return None
        decoded_response_id = (
            ResponsesAPIRequestUtils._decode_responses_api_response_id(response_id)
        )
        return decoded_response_id.get("model_id") or None
 class ResponseAPILoggingUtils:
    @staticmethod
--- a/litellm/router.py
+++ b/litellm/router.py
@ -739,12 +739,6 @@ class Router:
            litellm.afile_content, call_type="afile_content"
        )
        self.responses = self.factory_function(litellm.responses, call_type="responses")
        self.aget_responses = self.factory_function(
            litellm.aget_responses, call_type="aget_responses"
        )
        self.adelete_responses = self.factory_function(
            litellm.adelete_responses, call_type="adelete_responses"
        )
    def validate_fallbacks(self, fallback_param: Optional[List]):
        """
@ -3087,8 +3081,6 @@ class Router:
            "anthropic_messages",
            "aresponses",
            "responses",
            "aget_responses",
            "adelete_responses",
            "afile_delete",
            "afile_content",
        ] = "assistants",
@ -3143,11 +3135,6 @@ class Router:
                    original_function=original_function,
                    **kwargs,
                )
            elif call_type in ("aget_responses", "adelete_responses"):
                return await self._init_responses_api_endpoints(
                    original_function=original_function,
                    **kwargs,
                )
            elif call_type in ("afile_delete", "afile_content"):
                return await self._ageneric_api_call_with_fallbacks(
                    original_function=original_function,
@ -3158,28 +3145,6 @@ class Router:
        return async_wrapper
    async def _init_responses_api_endpoints(
        self,
        original_function: Callable,
        **kwargs,
    ):
        """
        Initialize the Responses API endpoints on the router.
        GET, DELETE Responses API Requests encode the model_id in the response_id, this function decodes the response_id and sets the model to the model_id.
        """
        from litellm.responses.utils import ResponsesAPIRequestUtils
        model_id = ResponsesAPIRequestUtils.get_model_id_from_response_id(
            kwargs.get("response_id")
        )
        if model_id is not None:
            kwargs["model"] = model_id
        return await self._ageneric_api_call_with_fallbacks(
            original_function=original_function,
            **kwargs,
        )
    async def _pass_through_assistants_endpoint_factory(
        self,
        original_function: Callable,
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -7058,17 +7058,6 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
        "supports_tool_choice": true
    },
    "command-a-03-2025": {
        "max_tokens": 8000,
        "max_input_tokens": 256000,
        "max_output_tokens": 8000,
        "input_cost_per_token": 0.0000025,
        "output_cost_per_token": 0.00001,
        "litellm_provider": "cohere_chat",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_tool_choice": true
    },
    "command-r": {
        "max_tokens": 4096,
        "max_input_tokens": 128000,
--- a/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
+++ b/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
@ -18,7 +18,6 @@ from litellm.proxy.common_utils.http_parsing_utils import (
    _read_request_body,
    _safe_get_request_parsed_body,
    _safe_set_request_parsed_body,
    get_form_data,
 )
@ -148,53 +147,3 @@ async def test_circular_reference_handling():
    assert (
        "proxy_server_request" not in result2
    )  # This will pass, showing the cache pollution
@pytest.mark.asyncio
 async def test_get_form_data():
    """
    Test that get_form_data correctly handles form data with array notation.
    Tests audio transcription parameters as a specific example.
    """
    # Create a mock request with transcription form data
    mock_request = MagicMock()
    # Create mock form data with array notation for timestamp_granularities
    mock_form_data = {
        "file": "file_object",  # In a real request this would be an UploadFile
        "model": "gpt-4o-transcribe",
        "include[]": "logprobs",  # Array notation
        "language": "en",
        "prompt": "Transcribe this audio file",
        "response_format": "json",
        "stream": "false",
        "temperature": "0.2",
        "timestamp_granularities[]": "word",  # First array item
        "timestamp_granularities[]": "segment",  # Second array item (would overwrite in dict, but handled by the function)
    }
    # Mock the form method to return the test data
    mock_request.form = AsyncMock(return_value=mock_form_data)
    # Call the function being tested
    result = await get_form_data(mock_request)
    # Verify regular form fields are preserved
    assert result["file"] == "file_object"
    assert result["model"] == "gpt-4o-transcribe"
    assert result["language"] == "en"
    assert result["prompt"] == "Transcribe this audio file"
    assert result["response_format"] == "json"
    assert result["stream"] == "false"
    assert result["temperature"] == "0.2"
    # Verify array fields are correctly parsed
    assert "include" in result
    assert isinstance(result["include"], list)
    assert "logprobs" in result["include"]
    assert "timestamp_granularities" in result
    assert isinstance(result["timestamp_granularities"], list)
    # Note: In a real MultiDict, both values would be present
    # But in our mock dictionary the second value overwrites the first
    assert "segment" in result["timestamp_granularities"]
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@ -947,8 +947,6 @@ class BaseLLMChatTest(ABC):
                second_response.choices[0].message.content is not None
                or second_response.choices[0].message.tool_calls is not None
            )
        except litellm.ServiceUnavailableError:
            pytest.skip("Model is overloaded")
        except litellm.InternalServerError:
            pytest.skip("Model is overloaded")
        except litellm.RateLimitError:
--- a/tests/openai_endpoints_tests/test_e2e_openai_responses_api.py
+++ b/tests/openai_endpoints_tests/test_e2e_openai_responses_api.py
@ -73,31 +73,15 @@ def validate_stream_chunk(chunk):
 def test_basic_response():
    client = get_test_client()
    response = client.responses.create(
-        model="gpt-4.0", input="just respond with the word 'ping'"
+        model="gpt-4o", input="just respond with the word 'ping'"
    )
    print("basic response=", response)
    # get the response
    response = client.responses.retrieve(response.id)
    print("GET response=", response)
    # delete the response
    delete_response = client.responses.delete(response.id)
    print("DELETE response=", delete_response)
    # try getting the response again, we should not get it back 
    get_response = client.responses.retrieve(response.id)
    print("GET response after delete=", get_response)
    with pytest.raises(Exception):
        get_response = client.responses.retrieve(response.id)
 def test_streaming_response():
    client = get_test_client()
    stream = client.responses.create(
-        model="gpt-4.0", input="just respond with the word 'ping'", stream=True
+        model="gpt-4o", input="just respond with the word 'ping'", stream=True
    )
    collected_chunks = []
@ -120,5 +104,5 @@ def test_bad_request_bad_param_error():
    with pytest.raises(BadRequestError):
        # Trigger error with invalid model name
        client.responses.create(
-            model="gpt-4.0", input="This should fail", temperature=2000
+            model="gpt-4o", input="This should fail", temperature=2000
        )
--- a/tests/router_unit_tests/test_router_helper_utils.py
+++ b/tests/router_unit_tests/test_router_helper_utils.py
@ -1157,14 +1157,3 @@ def test_cached_get_model_group_info(model_list):
    # Verify the cache info shows hits
    cache_info = router._cached_get_model_group_info.cache_info()
    assert cache_info.hits > 0  # Should have at least one cache hit
 def test_init_responses_api_endpoints(model_list):
    """Test if the '_init_responses_api_endpoints' function is working correctly"""
    from typing import Callable
    router = Router(model_list=model_list)
    assert router.aget_responses is not None
    assert isinstance(router.aget_responses, Callable)
    assert router.adelete_responses is not None
    assert isinstance(router.adelete_responses, Callable)
--- a/ui/litellm-dashboard/src/app/page.tsx
+++ b/ui/litellm-dashboard/src/app/page.tsx
@ -151,7 +151,7 @@ export default function CreateKeyPage() {
    if (redirectToLogin) {
      window.location.href = (proxyBaseUrl || "") + "/sso/key/generate"
    }
-  }, [redirectToLogin])
+  }, [token, authLoading])
  useEffect(() => {
    if (!token) {
@ -223,7 +223,7 @@ export default function CreateKeyPage() {
    }
  }, [accessToken, userID, userRole]);
-  if (authLoading || redirectToLogin) {
+  if (authLoading) {
    return <LoadingScreen />
  }
--- a/ui/litellm-dashboard/src/components/all_keys_table.tsx
+++ b/ui/litellm-dashboard/src/components/all_keys_table.tsx
@ -450,8 +450,6 @@ export function AllKeysTable({
              columns={columns.filter(col => col.id !== 'expander') as any}
              data={filteredKeys as any}
              isLoading={isLoading}
              loadingMessage="🚅 Loading keys..."
              noDataMessage="No keys found"
              getRowCanExpand={() => false}
              renderSubComponent={() => <></>}
            />
--- a/ui/litellm-dashboard/src/components/mcp_tools/index.tsx
+++ b/ui/litellm-dashboard/src/components/mcp_tools/index.tsx
@ -26,8 +26,6 @@ function DataTableWrapper({
      isLoading={isLoading}
      renderSubComponent={renderSubComponent}
      getRowCanExpand={getRowCanExpand}
      loadingMessage="🚅 Loading tools..."
      noDataMessage="No tools found"
    />
  );
 }
--- a/ui/litellm-dashboard/src/components/view_logs/table.tsx
+++ b/ui/litellm-dashboard/src/components/view_logs/table.tsx
@ -26,8 +26,6 @@ interface DataTableProps<TData, TValue> {
  expandedRequestId?: string | null;
  onRowExpand?: (requestId: string | null) => void;
  setSelectedKeyIdInfoView?: (keyId: string | null) => void;
  loadingMessage?: string;
  noDataMessage?: string;
 }
 export function DataTable<TData extends { request_id: string }, TValue>({
@ -38,8 +36,6 @@ export function DataTable<TData extends { request_id: string }, TValue>({
  isLoading = false,
  expandedRequestId,
  onRowExpand,
  loadingMessage = "🚅 Loading logs...",
  noDataMessage = "No logs found",
 }: DataTableProps<TData, TValue>) {
  const table = useReactTable({
    data,
@ -118,7 +114,7 @@ export function DataTable<TData extends { request_id: string }, TValue>({
            <TableRow>
              <TableCell colSpan={columns.length} className="h-8 text-center">
                <div className="text-center text-gray-500">
-                  <p>{loadingMessage}</p>
+                  <p>🚅 Loading logs...</p>
                </div>
              </TableCell>
            </TableRow>
@ -151,7 +147,7 @@ export function DataTable<TData extends { request_id: string }, TValue>({
          : <TableRow>
              <TableCell colSpan={columns.length} className="h-8 text-center">
                <div className="text-center text-gray-500">
-                  <p>{noDataMessage}</p>
+                  <p>No logs found</p>
                </div>
              </TableCell>
            </TableRow>