mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
Support temporary budget increases on keys (#7754)
* fix(gpt_transformation.py): fix response_format translation check for 4o models Fixes https://github.com/BerriAI/litellm/issues/7616 * feat(key_management_endpoints.py): support 'temp_budget_increase' and 'temp_budget_expiry' fields Allow proxy admin to grant temporary budget increases to keys * fix(proxy/_types.py): enforce temp_budget_increase and temp_budget_expiry are always passed together * feat(user_api_key_auth.py): initial working temp budget increase logic ensures key budget exceeded error checks for temp budget in key metadata * feat(proxy_server.py): return the key max budget and key spend in the response headers Allows clientside user to know their remaining limits * test: add unit testing for new proxy utils Ensures new key budget is correctly handled * docs(temporary_budget_increase.md): add doc on temporary budget increase * fix(utils.py): remove 3.5 from response_format check for now not all azure 3.5 models support response_format * fix(user_api_key_auth.py): return valid user api key auth object on all paths
This commit is contained in:
parent
000d3152a8
commit
d7a13ad561
11 changed files with 259 additions and 52 deletions
|
@ -248,3 +248,38 @@ def test_get_azure_ad_token_from_username_password(
|
|||
|
||||
# Verify the result is the mock token provider
|
||||
assert result == mock_token_provider
|
||||
|
||||
|
||||
def test_azure_openai_gpt_4o_naming(monkeypatch):
|
||||
from openai import AzureOpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
monkeypatch.setenv("AZURE_API_VERSION", "2024-10-21")
|
||||
|
||||
client = AzureOpenAI(
|
||||
api_key="test-api-key",
|
||||
base_url="https://my-endpoint-sweden-berri992.openai.azure.com",
|
||||
api_version="2023-12-01-preview",
|
||||
)
|
||||
|
||||
class ResponseFormat(BaseModel):
|
||||
|
||||
number: str = Field(description="total number of days in a week")
|
||||
days: list[str] = Field(description="name of days in a week")
|
||||
|
||||
with patch.object(client.chat.completions.with_raw_response, "create") as mock_post:
|
||||
try:
|
||||
completion(
|
||||
model="azure/gpt4o",
|
||||
messages=[{"role": "user", "content": "Hello world"}],
|
||||
response_format=ResponseFormat,
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
mock_post.assert_called_once()
|
||||
|
||||
print(mock_post.call_args.kwargs)
|
||||
|
||||
assert "tool_calls" not in mock_post.call_args.kwargs
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue