Support temporary budget increases on keys (#7754)

* fix(gpt_transformation.py): fix response_format translation check for 4o models Fixes https://github.com/BerriAI/litellm/issues/7616 * feat(key_management_endpoints.py): support 'temp_budget_increase' and 'temp_budget_expiry' fields Allow proxy admin to grant temporary budget increases to keys * fix(proxy/_types.py): enforce temp_budget_increase and temp_budget_expiry are always passed together * feat(user_api_key_auth.py): initial working temp budget increase logic ensures key budget exceeded error checks for temp budget in key metadata * feat(proxy_server.py): return the key max budget and key spend in the response headers Allows clientside user to know their remaining limits * test: add unit testing for new proxy utils Ensures new key budget is correctly handled * docs(temporary_budget_increase.md): add doc on temporary budget increase * fix(utils.py): remove 3.5 from response_format check for now not all azure 3.5 models support response_format * fix(user_api_key_auth.py): return valid user api key auth object on all paths
2025-04-27 03:34:10 +00:00 · 2025-01-14 17:03:11 -08:00 · 2025-01-14 17:03:11 -08:00 · d7a13ad561
commit d7a13ad561
parent 000d3152a8
11 changed files with 259 additions and 52 deletions
--- a/tests/llm_translation/test_azure_openai.py
+++ b/tests/llm_translation/test_azure_openai.py
@ -248,3 +248,38 @@ def test_get_azure_ad_token_from_username_password(

    # Verify the result is the mock token provider
    assert result == mock_token_provider
+
+
+def test_azure_openai_gpt_4o_naming(monkeypatch):
+    from openai import AzureOpenAI
+    from pydantic import BaseModel, Field
+
+    monkeypatch.setenv("AZURE_API_VERSION", "2024-10-21")
+
+    client = AzureOpenAI(
+        api_key="test-api-key",
+        base_url="https://my-endpoint-sweden-berri992.openai.azure.com",
+        api_version="2023-12-01-preview",
+    )
+
+    class ResponseFormat(BaseModel):
+
+        number: str = Field(description="total number of days in a week")
+        days: list[str] = Field(description="name of days in a week")
+
+    with patch.object(client.chat.completions.with_raw_response, "create") as mock_post:
+        try:
+            completion(
+                model="azure/gpt4o",
+                messages=[{"role": "user", "content": "Hello world"}],
+                response_format=ResponseFormat,
+                client=client,
+            )
+        except Exception as e:
+            print(e)
+
+        mock_post.assert_called_once()
+
+        print(mock_post.call_args.kwargs)
+
+        assert "tool_calls" not in mock_post.call_args.kwargs