test: update tests to new deployment model (#10142)

* test: update tests to new deployment model

* test: update model name

* test: skip cohere rbac issue test

* test: update test - replace gpt-4o model
This commit is contained in:
Krish Dholakia 2025-04-18 14:22:12 -07:00 committed by GitHub
parent 415abfc222
commit 1ea046cc61
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
72 changed files with 294 additions and 292 deletions

View file

@ -14,7 +14,7 @@ from litellm.llms.anthropic.chat import ModelResponseIterator
import httpx
import json
from litellm.llms.custom_httpx.http_handler import HTTPHandler
from base_rerank_unit_tests import BaseLLMRerankTest
# from base_rerank_unit_tests import BaseLLMRerankTest
load_dotenv()
import io
@ -255,16 +255,17 @@ def test_azure_deepseek_reasoning_content():
assert response.choices[0].message.content == "\n\nThe sky is a canvas of blue"
class TestAzureAIRerank(BaseLLMRerankTest):
def get_custom_llm_provider(self) -> litellm.LlmProviders:
return litellm.LlmProviders.AZURE_AI
# skipping due to cohere rbac issues
# class TestAzureAIRerank(BaseLLMRerankTest):
# def get_custom_llm_provider(self) -> litellm.LlmProviders:
# return litellm.LlmProviders.AZURE_AI
def get_base_rerank_call_args(self) -> dict:
return {
"model": "azure_ai/cohere-rerank-v3-english",
"api_base": os.getenv("AZURE_AI_COHERE_API_BASE"),
"api_key": os.getenv("AZURE_AI_COHERE_API_KEY"),
}
# def get_base_rerank_call_args(self) -> dict:
# return {
# "model": "azure_ai/cohere-rerank-v3-english",
# "api_base": os.getenv("AZURE_AI_COHERE_API_BASE"),
# "api_key": os.getenv("AZURE_AI_COHERE_API_KEY"),
# }
@pytest.mark.asyncio
@ -279,7 +280,7 @@ async def test_azure_ai_request_format():
# Set up the test parameters
api_key = os.getenv("AZURE_API_KEY")
api_base = f"{os.getenv('AZURE_API_BASE')}/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"
api_base = f"{os.getenv('AZURE_API_BASE')}/openai/deployments/gpt-4o-new-test/chat/completions?api-version=2024-08-01-preview"
model = "azure_ai/gpt-4o"
messages = [
{"role": "user", "content": "hi"},

View file

@ -137,7 +137,7 @@ def test_azure_extra_headers(input, call_type, header_value):
func = image_generation
data = {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com",
"api_version": "2023-07-01-preview",
"api_key": "my-azure-api-key",
@ -339,7 +339,7 @@ def test_azure_gpt_4o_with_tool_call_and_response_format(api_version):
with patch.object(client.chat.completions.with_raw_response, "create") as mock_post:
response = litellm.completion(
model="azure/gpt-4o",
model="azure/gpt-4o-new-test",
messages=[
{
"role": "system",
@ -474,7 +474,7 @@ def test_azure_max_retries_0(
try:
completion(
model="azure/gpt-4o",
model="azure/gpt-4o-new-test",
messages=[{"role": "user", "content": "Hello world"}],
max_retries=max_retries,
stream=stream,
@ -502,7 +502,7 @@ async def test_async_azure_max_retries_0(
try:
await acompletion(
model="azure/gpt-4o",
model="azure/gpt-4o-new-test",
messages=[{"role": "user", "content": "Hello world"}],
max_retries=max_retries,
stream=stream,

View file

@ -217,7 +217,7 @@ def test_openai_optional_params_embeddings():
def test_azure_optional_params_embeddings():
litellm.drop_params = True
optional_params = get_optional_params_embeddings(
model="chatgpt-v-2",
model="chatgpt-v-3",
user="John",
encoding_format=None,
custom_llm_provider="azure",
@ -396,7 +396,7 @@ def test_azure_tool_choice(api_version):
"""
litellm.drop_params = True
optional_params = litellm.utils.get_optional_params(
model="chatgpt-v-2",
model="chatgpt-v-3",
user="John",
custom_llm_provider="azure",
max_tokens=10,

View file

@ -150,6 +150,7 @@ async def test_basic_rerank_together_ai(sync_mode):
@pytest.mark.asyncio()
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.skip(reason="Skipping test due to Cohere RBAC issues")
async def test_basic_rerank_azure_ai(sync_mode):
import os

View file

@ -91,7 +91,7 @@ async def make_async_calls(metadata=None, **completion_kwargs):
def create_async_task(**completion_kwargs):
litellm.set_verbose = True
completion_args = {
"model": "openai/chatgpt-v-2",
"model": "openai/chatgpt-v-3",
"api_version": "2024-02-01",
"messages": [{"role": "user", "content": "This is a test"}],
"max_tokens": 5,

View file

@ -86,7 +86,7 @@ def create_async_task(**completion_kwargs):
By default a standard set of arguments are used for the litellm.acompletion function.
"""
completion_args = {
"model": "openai/chatgpt-v-2",
"model": "openai/chatgpt-v-3",
"api_version": "2024-02-01",
"messages": [{"role": "user", "content": "This is a test" * 100}],
"max_tokens": 5,

View file

@ -1,7 +1,7 @@
model_list:
- model_name: gpt-4-team1
litellm_params:
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
api_version: "2023-05-15"
api_key: os.environ/AZURE_API_KEY

View file

@ -26,7 +26,7 @@ model_list = [
{ # list of model deployments
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -143,7 +143,7 @@ async def test_cooldown_same_model_name(sync_mode):
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -153,7 +153,7 @@ async def test_cooldown_same_model_name(sync_mode):
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -184,7 +184,7 @@ async def test_cooldown_same_model_name(sync_mode):
model_ids.append(model["model_info"]["id"])
print("\n litellm model ids ", model_ids)
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
# example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960']
assert (
model_ids[0] != model_ids[1]
) # ensure both models have a uuid added, and they have different names
@ -201,7 +201,7 @@ async def test_cooldown_same_model_name(sync_mode):
model_ids.append(model["model_info"]["id"])
print("\n litellm model ids ", model_ids)
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
# example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960']
assert (
model_ids[0] != model_ids[1]
) # ensure both models have a uuid added, and they have different names

View file

@ -194,7 +194,7 @@ def create_async_task(**completion_kwargs):
By default a standard set of arguments are used for the litellm.acompletion function.
"""
completion_args = {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_version": "2024-02-01",
"messages": [{"role": "user", "content": "This is a test"}],
"max_tokens": 5,

View file

@ -71,7 +71,7 @@ async def test_create_delete_assistants(provider, sync_mode):
model = "gpt-4-turbo"
if provider == "azure":
os.environ["AZURE_API_VERSION"] = "2024-05-01-preview"
model = "chatgpt-v-2"
model = "chatgpt-v-3"
if sync_mode == True:
assistant = litellm.create_assistants(

View file

@ -46,7 +46,7 @@ async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_base": os.getenv("AZURE_API_BASE"),
"tenant_id": os.getenv("AZURE_TENANT_ID"),
"client_id": os.getenv("AZURE_CLIENT_ID"),
@ -95,6 +95,6 @@ async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):
assert json_body == {
"messages": [{"role": "user", "content": "Hello world!"}],
"model": "chatgpt-v-2",
"model": "chatgpt-v-3",
"stream": False,
}

View file

@ -18,7 +18,7 @@
# {
# "model_name": "azure-test",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "api_version": os.getenv("AZURE_API_VERSION"),
@ -33,7 +33,7 @@
# try:
# start_time = time.time()
# response = await client.chat.completions.create(
# model="chatgpt-v-2",
# model="chatgpt-v-3",
# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
# stream=True,
# )

View file

@ -324,7 +324,7 @@ def test_caching_with_models_v2():
litellm.set_verbose = True
response1 = completion(model="gpt-3.5-turbo", messages=messages, caching=True)
response2 = completion(model="gpt-3.5-turbo", messages=messages, caching=True)
response3 = completion(model="azure/chatgpt-v-2", messages=messages, caching=True)
response3 = completion(model="azure/chatgpt-v-3", messages=messages, caching=True)
print(f"response1: {response1}")
print(f"response2: {response2}")
print(f"response3: {response3}")
@ -1170,7 +1170,7 @@ async def test_s3_cache_stream_azure(sync_mode):
if sync_mode:
response1 = litellm.completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
max_tokens=40,
temperature=1,
@ -1183,7 +1183,7 @@ async def test_s3_cache_stream_azure(sync_mode):
print(response_1_content)
else:
response1 = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
max_tokens=40,
temperature=1,
@ -1203,7 +1203,7 @@ async def test_s3_cache_stream_azure(sync_mode):
if sync_mode:
response2 = litellm.completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
max_tokens=40,
temperature=1,
@ -1216,7 +1216,7 @@ async def test_s3_cache_stream_azure(sync_mode):
print(response_2_content)
else:
response2 = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
max_tokens=40,
temperature=1,
@ -1279,7 +1279,7 @@ async def test_s3_cache_acompletion_azure():
print("s3 Cache: test for caching, streaming + completion")
response1 = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
max_tokens=40,
temperature=1,
@ -1289,7 +1289,7 @@ async def test_s3_cache_acompletion_azure():
time.sleep(2)
response2 = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
max_tokens=40,
temperature=1,

View file

@ -58,7 +58,7 @@ def test_caching_router():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),

View file

@ -55,7 +55,7 @@
# # {
# # "model_name": "gpt-3.5-turbo", # openai model name
# # "litellm_params": { # params for litellm completion/embedding call
# # "model": "azure/chatgpt-v-2",
# # "model": "azure/chatgpt-v-3",
# # "api_key": os.getenv("AZURE_API_KEY"),
# # "api_version": os.getenv("AZURE_API_VERSION"),
# # "api_base": os.getenv("AZURE_API_BASE"),
@ -93,7 +93,7 @@
# # {
# # "model_name": "gpt-3.5-turbo", # openai model name
# # "litellm_params": { # params for litellm completion/embedding call
# # "model": "azure/chatgpt-v-2",
# # "model": "azure/chatgpt-v-3",
# # "api_key": os.getenv("AZURE_API_KEY"),
# # "api_version": os.getenv("AZURE_API_VERSION"),
# # "api_base": os.getenv("AZURE_API_BASE"),

View file

@ -732,7 +732,7 @@ def encode_image(image_path):
"model",
[
"gpt-4o",
"azure/gpt-4o",
"azure/gpt-4o-new-test",
"anthropic/claude-3-opus-20240229",
],
) #
@ -1824,9 +1824,9 @@ def test_completion_openai():
"model, api_version",
[
# ("gpt-4o-2024-08-06", None),
# ("azure/chatgpt-v-2", None),
# ("azure/chatgpt-v-3", None),
("bedrock/anthropic.claude-3-sonnet-20240229-v1:0", None),
# ("azure/gpt-4o", "2024-08-01-preview"),
# ("azure/gpt-4o-new-test", "2024-08-01-preview"),
],
)
@pytest.mark.flaky(retries=3, delay=1)
@ -2495,7 +2495,7 @@ def test_completion_azure_extra_headers():
litellm.client_session = http_client
try:
response = completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
api_base=os.getenv("AZURE_API_BASE"),
api_version="2023-07-01-preview",
@ -2544,7 +2544,7 @@ def test_completion_azure_ad_token():
litellm.client_session = http_client
try:
response = completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
azure_ad_token="my-special-token",
)
@ -2575,7 +2575,7 @@ def test_completion_azure_key_completion_arg():
litellm.set_verbose = True
## Test azure call
response = completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
api_key=old_key,
logprobs=True,
@ -2633,7 +2633,7 @@ async def test_re_use_azure_async_client():
## Test azure call
for _ in range(3):
response = await litellm.acompletion(
model="azure/chatgpt-v-2", messages=messages, client=client
model="azure/chatgpt-v-3", messages=messages, client=client
)
print(f"response: {response}")
except Exception as e:
@ -2665,7 +2665,7 @@ def test_completion_azure():
litellm.set_verbose = False
## Test azure call
response = completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
api_key="os.environ/AZURE_API_KEY",
)
@ -2673,7 +2673,7 @@ def test_completion_azure():
print(f"response hidden params: {response._hidden_params}")
## Test azure flag for backwards-compat
# response = completion(
# model="chatgpt-v-2",
# model="chatgpt-v-3",
# messages=messages,
# azure=True,
# max_tokens=10
@ -2712,7 +2712,7 @@ def test_azure_openai_ad_token():
litellm.input_callback = [tester]
try:
response = litellm.completion(
model="azure/chatgpt-v-2", # e.g. gpt-35-instant
model="azure/chatgpt-v-3", # e.g. gpt-35-instant
messages=[
{
"role": "user",
@ -2750,7 +2750,7 @@ def test_completion_azure2():
## Test azure call
response = completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
api_base=api_base,
api_key=api_key,
@ -2787,7 +2787,7 @@ def test_completion_azure3():
## Test azure call
response = completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
max_tokens=10,
)
@ -2835,7 +2835,7 @@ def test_completion_azure_with_litellm_key():
openai.api_key = "ymca"
response = completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
)
# Add any assertions here to check the response
@ -2863,7 +2863,7 @@ def test_completion_azure_deployment_id():
try:
litellm.set_verbose = True
response = completion(
deployment_id="chatgpt-v-2",
deployment_id="chatgpt-v-3",
model="gpt-3.5-turbo",
messages=messages,
)
@ -3925,7 +3925,7 @@ def test_completion_stream_watsonx():
@pytest.mark.parametrize(
"provider, model, project, region_name, token",
[
("azure", "chatgpt-v-2", None, None, "test-token"),
("azure", "chatgpt-v-3", None, None, "test-token"),
("vertex_ai", "anthropic-claude-3", "adroit-crow-1", "us-east1", None),
("watsonx", "ibm/granite", "96946574", "dallas", "1234"),
("bedrock", "anthropic.claude-3", None, "us-east-1", None),
@ -4178,7 +4178,7 @@ async def test_completion_ai21_chat():
@pytest.mark.parametrize(
"model",
["gpt-4o", "azure/chatgpt-v-2"],
["gpt-4o", "azure/chatgpt-v-3"],
)
@pytest.mark.parametrize(
"stream",
@ -4200,7 +4200,7 @@ def test_completion_response_ratelimit_headers(model, stream):
assert "x-ratelimit-remaining-requests" in additional_headers
assert "x-ratelimit-remaining-tokens" in additional_headers
if model == "azure/chatgpt-v-2":
if model == "azure/chatgpt-v-3":
# Azure OpenAI header
assert "llm_provider-azureml-model-session" in additional_headers
if model == "claude-3-sonnet-20240229":

View file

@ -46,7 +46,7 @@ async def test_delete_deployment():
import base64
litellm_params = LiteLLM_Params(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
api_key=os.getenv("AZURE_API_KEY"),
api_base=os.getenv("AZURE_API_BASE"),
api_version=os.getenv("AZURE_API_VERSION"),
@ -232,7 +232,7 @@ async def test_db_error_new_model_check():
litellm_params = LiteLLM_Params(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
api_key=os.getenv("AZURE_API_KEY"),
api_base=os.getenv("AZURE_API_BASE"),
api_version=os.getenv("AZURE_API_VERSION"),
@ -250,7 +250,7 @@ def _create_model_list(flag_value: Literal[0, 1], master_key: str):
import base64
new_litellm_params = LiteLLM_Params(
model="azure/chatgpt-v-2-3",
model="azure/chatgpt-v-3-3",
api_key=os.getenv("AZURE_API_KEY"),
api_base=os.getenv("AZURE_API_BASE"),
api_version=os.getenv("AZURE_API_VERSION"),

View file

@ -5,12 +5,12 @@ model_list:
model: gpt-3.5-turbo
- model_name: working-azure-gpt-3.5-turbo
litellm_params:
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
- model_name: azure-gpt-3.5-turbo
litellm_params:
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
api_base: os.environ/AZURE_API_BASE
api_key: bad-key
- model_name: azure-embedding

View file

@ -1,7 +1,7 @@
model_list:
- model_name: azure-cloudflare
litellm_params:
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
api_key: os.environ/AZURE_API_KEY
api_version: 2023-07-01-preview

View file

@ -12,7 +12,7 @@ model_list:
- litellm_params:
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
api_key: os.environ/AZURE_API_KEY
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
model_name: azure-cloudflare-model
- litellm_params:
api_base: https://openai-france-1234.openai.azure.com

View file

@ -1,7 +1,7 @@
model_list:
- model_name: Azure OpenAI GPT-4 Canada
litellm_params:
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview"

View file

@ -450,12 +450,12 @@ def test_chat_azure_stream():
customHandler = CompletionCustomHandler()
litellm.callbacks = [customHandler]
response = litellm.completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
)
# test streaming
response = litellm.completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
stream=True,
)
@ -464,7 +464,7 @@ def test_chat_azure_stream():
# test failure callback
try:
response = litellm.completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
api_key="my-bad-key",
stream=True,
@ -491,12 +491,12 @@ async def test_async_chat_azure_stream():
customHandler = CompletionCustomHandler()
litellm.callbacks = [customHandler]
response = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
)
## test streaming
response = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
stream=True,
)
@ -507,7 +507,7 @@ async def test_async_chat_azure_stream():
# test failure callback
try:
response = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
api_key="my-bad-key",
stream=True,
@ -1018,7 +1018,7 @@ async def test_async_completion_azure_caching():
litellm.callbacks = [customHandler_caching]
unique_time = time.time()
response1 = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[
{"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
],
@ -1027,7 +1027,7 @@ async def test_async_completion_azure_caching():
await asyncio.sleep(1)
print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
response2 = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[
{"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
],
@ -1056,7 +1056,7 @@ async def test_async_completion_azure_caching_streaming():
litellm.callbacks = [customHandler_caching]
unique_time = uuid.uuid4()
response1 = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[
{"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
],
@ -1069,7 +1069,7 @@ async def test_async_completion_azure_caching_streaming():
initial_customhandler_caching_states = len(customHandler_caching.states)
print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
response2 = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[
{"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
],
@ -1207,7 +1207,7 @@ def test_turn_off_message_logging():
"model",
[
"ft:gpt-3.5-turbo:my-org:custom_suffix:id"
], # "gpt-3.5-turbo", "azure/chatgpt-v-2",
], # "gpt-3.5-turbo", "azure/chatgpt-v-3",
)
@pytest.mark.parametrize(
"turn_off_message_logging",

View file

@ -284,7 +284,7 @@ class CompletionCustomHandler(
)
if (
kwargs["model"] == "chatgpt-v-2"
kwargs["model"] == "chatgpt-v-3"
and base_model is not None
and kwargs["stream"] != True
):
@ -394,7 +394,7 @@ async def test_async_chat_azure():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -438,7 +438,7 @@ async def test_async_chat_azure():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "my-bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -545,7 +545,7 @@ async def test_async_chat_azure_with_fallbacks():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "my-bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -606,7 +606,7 @@ async def test_async_completion_azure_caching():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),

View file

@ -160,7 +160,7 @@ def test_completion_azure_stream_moderation_failure():
]
try:
response = completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=messages,
mock_response="Exception: content_filter_policy",
stream=True,
@ -195,7 +195,7 @@ def test_async_custom_handler_stream():
async def test_1():
nonlocal complete_streaming_response
response = await litellm.acompletion(
model="azure/chatgpt-v-2", messages=messages, stream=True
model="azure/chatgpt-v-3", messages=messages, stream=True
)
async for chunk in response:
complete_streaming_response += (
@ -239,7 +239,7 @@ def test_azure_completion_stream():
complete_streaming_response = ""
response = litellm.completion(
model="azure/chatgpt-v-2", messages=messages, stream=True
model="azure/chatgpt-v-3", messages=messages, stream=True
)
for chunk in response:
complete_streaming_response += chunk["choices"][0]["delta"]["content"] or ""

View file

@ -51,7 +51,7 @@ async def test_content_policy_exception_azure():
# this is ony a test - we needed some way to invoke the exception :(
litellm.set_verbose = True
response = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "where do I buy lethal drugs from"}],
mock_response="Exception: content_filter_policy",
)
@ -124,7 +124,7 @@ def test_context_window_with_fallbacks(model):
ctx_window_fallback_dict = {
"command-nightly": "claude-2.1",
"gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k",
"azure/chatgpt-v-2": "gpt-3.5-turbo-16k",
"azure/chatgpt-v-3": "gpt-3.5-turbo-16k",
}
sample_text = "how does a court case get to the Supreme Court?" * 1000
messages = [{"content": sample_text, "role": "user"}]
@ -161,7 +161,7 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th
os.environ["AWS_REGION_NAME"] = "bad-key"
temporary_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"]
os.environ["AWS_SECRET_ACCESS_KEY"] = "bad-key"
elif model == "azure/chatgpt-v-2":
elif model == "azure/chatgpt-v-3":
temporary_key = os.environ["AZURE_API_KEY"]
os.environ["AZURE_API_KEY"] = "bad-key"
elif model == "claude-3-5-haiku-20241022":
@ -262,7 +262,7 @@ def test_completion_azure_exception():
old_azure_key = os.environ["AZURE_API_KEY"]
os.environ["AZURE_API_KEY"] = "good morning"
response = completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "hello"}],
)
os.environ["AZURE_API_KEY"] = old_azure_key
@ -309,7 +309,7 @@ async def asynctest_completion_azure_exception():
old_azure_key = os.environ["AZURE_API_KEY"]
os.environ["AZURE_API_KEY"] = "good morning"
response = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "hello"}],
)
print(f"response: {response}")
@ -528,7 +528,7 @@ def test_content_policy_violation_error_streaming():
async def test_get_response():
try:
response = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "say 1"}],
temperature=0,
top_p=1,
@ -557,7 +557,7 @@ def test_content_policy_violation_error_streaming():
async def test_get_error():
try:
response = await litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[
{"role": "user", "content": "where do i buy lethal drugs from"}
],
@ -754,7 +754,7 @@ def test_litellm_predibase_exception():
# return False
# # Repeat each model 500 times
# # extended_models = [model for model in models for _ in range(250)]
# extended_models = ["azure/chatgpt-v-2" for _ in range(250)]
# extended_models = ["azure/chatgpt-v-3" for _ in range(250)]
# def worker(model):
# return test_model_call(model)
@ -934,7 +934,7 @@ def _pre_call_utils_httpx(
("openai", "gpt-3.5-turbo", "chat_completion", False),
("openai", "gpt-3.5-turbo", "chat_completion", True),
("openai", "gpt-3.5-turbo-instruct", "completion", True),
("azure", "azure/chatgpt-v-2", "chat_completion", True),
("azure", "azure/chatgpt-v-3", "chat_completion", True),
("azure", "azure/text-embedding-ada-002", "embedding", True),
("azure", "azure_text/gpt-3.5-turbo-instruct", "completion", True),
],
@ -1158,7 +1158,7 @@ async def test_exception_with_headers_httpx(
@pytest.mark.asyncio
@pytest.mark.parametrize("model", ["azure/chatgpt-v-2", "openai/gpt-3.5-turbo"])
@pytest.mark.parametrize("model", ["azure/chatgpt-v-3", "openai/gpt-3.5-turbo"])
async def test_bad_request_error_contains_httpx_response(model):
"""
Test that the BadRequestError contains the httpx response
@ -1209,7 +1209,7 @@ def test_context_window_exceeded_error_from_litellm_proxy():
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.parametrize("stream_mode", [True, False])
@pytest.mark.parametrize("model", ["azure/gpt-4o"]) # "gpt-4o-mini",
@pytest.mark.parametrize("model", ["azure/gpt-4o-new-test"]) # "gpt-4o-mini",
@pytest.mark.asyncio
async def test_exception_bubbling_up(sync_mode, stream_mode, model):
"""

View file

@ -108,14 +108,14 @@ async def test_aaabasic_gcs_logger():
},
"endpoint": "http://localhost:4000/chat/completions",
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
"model_info": {
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
"db_model": False,
},
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
"caching_groups": None,
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
},
)
@ -216,14 +216,14 @@ async def test_basic_gcs_logger_failure():
},
"endpoint": "http://localhost:4000/chat/completions",
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
"model_info": {
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
"db_model": False,
},
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
"caching_groups": None,
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
},
)
except Exception:
@ -626,14 +626,14 @@ async def test_basic_gcs_logger_with_folder_in_bucket_name():
},
"endpoint": "http://localhost:4000/chat/completions",
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
"model_info": {
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
"db_model": False,
},
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
"caching_groups": None,
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
},
)

View file

@ -20,7 +20,7 @@ import litellm
async def test_azure_health_check():
response = await litellm.ahealth_check(
model_params={
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [{"role": "user", "content": "Hey, how's it going?"}],
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),

View file

@ -78,7 +78,7 @@ async def make_async_calls(metadata=None, **completion_kwargs):
def create_async_task(**completion_kwargs):
completion_args = {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_version": "2024-02-01",
"messages": [{"role": "user", "content": "This is a test"}],
"max_tokens": 5,

View file

@ -33,7 +33,7 @@ def test_model_added():
"litellm_params": {
"metadata": {
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
},
"model_info": {"id": "1234"},
}
@ -47,7 +47,7 @@ def test_get_available_deployments():
test_cache = DualCache()
least_busy_logger = LeastBusyLoggingHandler(router_cache=test_cache, model_list=[])
model_group = "gpt-3.5-turbo"
deployment = "azure/chatgpt-v-2"
deployment = "azure/chatgpt-v-3"
kwargs = {
"litellm_params": {
"metadata": {
@ -113,7 +113,7 @@ async def test_router_get_available_deployments(async_test):
router.leastbusy_logger.test_flag = True
model_group = "azure-model"
deployment = "azure/chatgpt-v-2"
deployment = "azure/chatgpt-v-3"
request_count_dict = {1: 10, 2: 54, 3: 100}
cache_key = f"{model_group}_request_count"
if async_test is True:

View file

@ -46,7 +46,7 @@
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "api_version": os.getenv("AZURE_API_VERSION"),

View file

@ -38,7 +38,7 @@
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "api_version": os.getenv("AZURE_API_VERSION"),

View file

@ -60,7 +60,7 @@ async def test_get_available_deployments_custom_price():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00003,
},

View file

@ -48,7 +48,7 @@ async def test_latency_memory_leak(sync_mode):
"litellm_params": {
"metadata": {
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
},
"model_info": {"id": deployment_id},
}
@ -130,7 +130,7 @@ def test_latency_updated():
"litellm_params": {
"metadata": {
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
},
"model_info": {"id": deployment_id},
}
@ -173,7 +173,7 @@ def test_latency_updated_custom_ttl():
"litellm_params": {
"metadata": {
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
},
"model_info": {"id": deployment_id},
}
@ -200,12 +200,12 @@ def test_get_available_deployments():
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "azure/chatgpt-v-2"},
"litellm_params": {"model": "azure/chatgpt-v-3"},
"model_info": {"id": "1234"},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "azure/chatgpt-v-2"},
"litellm_params": {"model": "azure/chatgpt-v-3"},
"model_info": {"id": "5678"},
},
]
@ -219,7 +219,7 @@ def test_get_available_deployments():
"litellm_params": {
"metadata": {
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
},
"model_info": {"id": deployment_id},
}
@ -240,7 +240,7 @@ def test_get_available_deployments():
"litellm_params": {
"metadata": {
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
},
"model_info": {"id": deployment_id},
}
@ -275,7 +275,7 @@ async def _deploy(lowest_latency_logger, deployment_id, tokens_used, duration):
"litellm_params": {
"metadata": {
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
},
"model_info": {"id": deployment_id},
}
@ -317,12 +317,12 @@ def test_get_available_endpoints_tpm_rpm_check_async(ans_rpm):
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "azure/chatgpt-v-2"},
"litellm_params": {"model": "azure/chatgpt-v-3"},
"model_info": {"id": "1234", "rpm": ans_rpm},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "azure/chatgpt-v-2"},
"litellm_params": {"model": "azure/chatgpt-v-3"},
"model_info": {"id": "5678", "rpm": non_ans_rpm},
},
]
@ -366,12 +366,12 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "azure/chatgpt-v-2"},
"litellm_params": {"model": "azure/chatgpt-v-3"},
"model_info": {"id": "1234", "rpm": ans_rpm},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "azure/chatgpt-v-2"},
"litellm_params": {"model": "azure/chatgpt-v-3"},
"model_info": {"id": "5678", "rpm": non_ans_rpm},
},
]
@ -385,7 +385,7 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
"litellm_params": {
"metadata": {
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
},
"model_info": {"id": deployment_id},
}
@ -407,7 +407,7 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
"litellm_params": {
"metadata": {
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
},
"model_info": {"id": deployment_id},
}

View file

@ -29,7 +29,7 @@
# {
# "model_name": "gpt-3.5-turbo", # openai model name
# "litellm_params": { # params for litellm completion/embedding call
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
@ -40,7 +40,7 @@
# {
# "model_name": "bad-model", # openai model name
# "litellm_params": { # params for litellm completion/embedding call
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": "bad-key",
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),

View file

@ -157,7 +157,7 @@ def test_router_mock_request_with_mock_timeout_with_fallbacks():
{
"model_name": "azure-gpt",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
},

View file

@ -104,12 +104,12 @@ async def test_router_with_caching():
model_list = [
{
"model_name": "azure/gpt-4",
"litellm_params": get_azure_params("chatgpt-v-2"),
"litellm_params": get_azure_params("chatgpt-v-3"),
"tpm": 100,
},
{
"model_name": "azure/gpt-4",
"litellm_params": get_azure_params("chatgpt-v-2"),
"litellm_params": get_azure_params("chatgpt-v-3"),
"tpm": 1000,
},
]

View file

@ -107,7 +107,7 @@ async def test_prompt_injection_llm_eval():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),

View file

@ -729,7 +729,7 @@ def azure_openai_test_completion():
try:
# OVERRIDE WITH DYNAMIC MAX TOKENS
response_1 = litellm.completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[
{
"content": "Hello, how are you? Be as verbose as possible",
@ -743,7 +743,7 @@ def azure_openai_test_completion():
# USE CONFIG TOKENS
response_2 = litellm.completion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[
{
"content": "Hello, how are you? Be as verbose as possible",

View file

@ -266,7 +266,7 @@ def test_router_sensitive_keys():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "special-key",
},
"model_info": {"id": 12345},
@ -334,7 +334,7 @@ async def test_router_retries(sync_mode):
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
@ -417,7 +417,7 @@ def test_exception_raising():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -479,7 +479,7 @@ def test_reading_key_from_model_list():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": old_api_key,
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -535,7 +535,7 @@ def test_reading_key_from_model_list():
def test_call_one_endpoint():
# [PROD TEST CASE]
# user passes one deployment they want to call on the router, we call the specified one
# this test makes a completion calls azure/chatgpt-v-2, it should work
# this test makes a completion calls azure/chatgpt-v-3, it should work
try:
print("Testing calling a specific deployment")
old_api_key = os.environ["AZURE_API_KEY"]
@ -544,7 +544,7 @@ def test_call_one_endpoint():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": old_api_key,
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -574,7 +574,7 @@ def test_call_one_endpoint():
async def call_azure_completion():
response = await router.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "hello this request will pass"}],
specific_deployment=True,
)
@ -620,7 +620,7 @@ def test_router_azure_acompletion():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": old_api_key,
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -793,7 +793,7 @@ def test_router_context_window_check_pre_call_check_in_group_custom_model_info()
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -847,7 +847,7 @@ def test_router_context_window_check_pre_call_check():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -901,7 +901,7 @@ def test_router_context_window_check_pre_call_check_out_group():
{
"model_name": "gpt-3.5-turbo-small", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -980,7 +980,7 @@ def test_router_region_pre_call_check(allowed_model_region):
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -2616,7 +2616,7 @@ def test_is_team_specific_model():
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "tpm": 100000,
@ -2626,7 +2626,7 @@ def test_is_team_specific_model():
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "tpm": 500,

View file

@ -74,7 +74,7 @@ async def test_provider_budgets_e2e_test():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -268,7 +268,7 @@ async def test_prometheus_metric_tracking():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),

View file

@ -96,7 +96,7 @@ async def test_acompletion_caching_on_router():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
@ -213,7 +213,7 @@ async def test_acompletion_caching_with_ttl_on_router():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
@ -279,7 +279,7 @@ async def test_acompletion_caching_on_router_caching_groups():
{
"model_name": "azure-gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),

View file

@ -43,7 +43,7 @@ async def test_router_init():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),

View file

@ -41,7 +41,7 @@ async def test_cooldown_badrequest_error():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),

View file

@ -33,7 +33,7 @@ def test_async_fallbacks(caplog):
{
"model_name": "azure/gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -93,7 +93,7 @@ def test_async_fallbacks(caplog):
# - error request, falling back notice, success notice
expected_logs = [
"Falling back to model_group = azure/gpt-3.5-turbo",
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
"litellm.acompletion(model=azure/chatgpt-v-3)\x1b[32m 200 OK\x1b[0m",
"Successful fallback b/w models.",
]

View file

@ -67,7 +67,7 @@ def test_sync_fallbacks():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -78,7 +78,7 @@ def test_sync_fallbacks():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -150,7 +150,7 @@ async def test_async_fallbacks():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -161,7 +161,7 @@ async def test_async_fallbacks():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -349,7 +349,7 @@ def test_dynamic_fallbacks_sync():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -360,7 +360,7 @@ def test_dynamic_fallbacks_sync():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -426,7 +426,7 @@ async def test_dynamic_fallbacks_async():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -437,7 +437,7 @@ async def test_dynamic_fallbacks_async():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -509,7 +509,7 @@ async def test_async_fallbacks_streaming():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -520,7 +520,7 @@ async def test_async_fallbacks_streaming():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -594,7 +594,7 @@ def test_sync_fallbacks_streaming():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -605,7 +605,7 @@ def test_sync_fallbacks_streaming():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -675,7 +675,7 @@ async def test_async_fallbacks_max_retries_per_request():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -686,7 +686,7 @@ async def test_async_fallbacks_max_retries_per_request():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -808,13 +808,13 @@ def test_ausage_based_routing_fallbacks():
model_list = [
{
"model_name": "azure/gpt-4-fast",
"litellm_params": get_azure_params("chatgpt-v-2"),
"litellm_params": get_azure_params("chatgpt-v-3"),
"model_info": {"id": 1},
"rpm": AZURE_FAST_RPM,
},
{
"model_name": "azure/gpt-4-basic",
"litellm_params": get_azure_params("chatgpt-v-2"),
"litellm_params": get_azure_params("chatgpt-v-3"),
"model_info": {"id": 2},
"rpm": AZURE_BASIC_RPM,
},
@ -889,7 +889,7 @@ def test_custom_cooldown_times():
{ # list of model deployments
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -899,7 +899,7 @@ def test_custom_cooldown_times():
{ # list of model deployments
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -993,7 +993,7 @@ async def test_service_unavailable_fallbacks(sync_mode):
{
"model_name": "gpt-3.5-turbo-0125-preview",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),

View file

@ -41,7 +41,7 @@ def test_weighted_selection_router():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
@ -54,7 +54,7 @@ def test_weighted_selection_router():
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("gpt-3.5-turbo")
selected_model_id = selected_model["litellm_params"]["model"]
@ -64,10 +64,10 @@ def test_weighted_selection_router():
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
assert (
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
router.reset()
except Exception as e:
@ -97,7 +97,7 @@ def test_weighted_selection_router_tpm():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
@ -110,7 +110,7 @@ def test_weighted_selection_router_tpm():
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("gpt-3.5-turbo")
selected_model_id = selected_model["litellm_params"]["model"]
@ -120,10 +120,10 @@ def test_weighted_selection_router_tpm():
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
assert (
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
router.reset()
except Exception as e:
@ -153,7 +153,7 @@ def test_weighted_selection_router_tpm_as_router_param():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
@ -166,7 +166,7 @@ def test_weighted_selection_router_tpm_as_router_param():
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("gpt-3.5-turbo")
selected_model_id = selected_model["litellm_params"]["model"]
@ -176,10 +176,10 @@ def test_weighted_selection_router_tpm_as_router_param():
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
assert (
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
router.reset()
except Exception as e:
@ -210,7 +210,7 @@ def test_weighted_selection_router_rpm_as_router_param():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
@ -224,7 +224,7 @@ def test_weighted_selection_router_rpm_as_router_param():
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("gpt-3.5-turbo")
selected_model_id = selected_model["litellm_params"]["model"]
@ -234,10 +234,10 @@ def test_weighted_selection_router_rpm_as_router_param():
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
assert (
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
router.reset()
except Exception as e:
@ -266,7 +266,7 @@ def test_weighted_selection_router_no_rpm_set():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
@ -286,7 +286,7 @@ def test_weighted_selection_router_no_rpm_set():
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("claude-1")
selected_model_id = selected_model["litellm_params"]["model"]
@ -296,7 +296,7 @@ def test_weighted_selection_router_no_rpm_set():
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
assert (
selection_counts["bedrock/claude1.2"] / total_requests == 1
), f"Assertion failed: Selection counts {selection_counts}"
@ -325,7 +325,7 @@ def test_model_group_aliases():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
@ -358,7 +358,7 @@ def test_model_group_aliases():
)
# test that
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
selection_counts = defaultdict(int)
for _ in range(1000):
selected_model = router.get_available_deployment("gpt-3.5-turbo")
@ -369,10 +369,10 @@ def test_model_group_aliases():
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
assert (
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
router.reset()
except Exception as e:
@ -552,7 +552,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
@ -566,7 +566,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
for _ in range(1000):
selected_model = await router.async_get_available_deployment(
"gpt-3.5-turbo", request_kwargs={}
@ -579,13 +579,13 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
total_requests = sum(selection_counts.values())
if rpm_list[0] is not None or tpm_list[0] is not None:
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
assert (
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
else:
# Assert both are used
assert selection_counts["azure/chatgpt-v-2"] > 0
assert selection_counts["azure/chatgpt-v-3"] > 0
assert selection_counts["gpt-3.5-turbo"] > 0
router.reset()
except Exception as e:

View file

@ -40,7 +40,7 @@
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
@ -96,7 +96,7 @@
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
@ -134,7 +134,7 @@
# {
# "model_name": "azure-cloudflare",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1",
@ -201,7 +201,7 @@
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
@ -254,7 +254,7 @@
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
@ -615,7 +615,7 @@
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
@ -660,7 +660,7 @@
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),

View file

@ -69,7 +69,7 @@ async def test_async_fallbacks():
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),

View file

@ -166,7 +166,7 @@ async def test_router_retry_policy(error_type):
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -175,7 +175,7 @@ async def test_router_retry_policy(error_type):
{
"model_name": "bad-model", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -275,7 +275,7 @@ async def test_dynamic_router_retry_policy(model_group):
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -287,7 +287,7 @@ async def test_dynamic_router_retry_policy(model_group):
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -299,7 +299,7 @@ async def test_dynamic_router_retry_policy(model_group):
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -311,7 +311,7 @@ async def test_dynamic_router_retry_policy(model_group):
{
"model_name": "bad-model", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -393,7 +393,7 @@ def test_retry_rate_limit_error_with_healthy_deployments():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -426,7 +426,7 @@ def test_do_retry_rate_limit_error_with_no_fallbacks_and_no_healthy_deployments(
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -459,14 +459,14 @@ def test_raise_context_window_exceeded_error():
llm_provider="azure",
model="gpt-3.5-turbo",
)
context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-2"]}]
context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-3"]}]
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -508,7 +508,7 @@ def test_raise_context_window_exceeded_error_no_retry():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -562,7 +562,7 @@ def test_timeout_for_rate_limit_error_with_healthy_deployments(
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -589,7 +589,7 @@ def test_timeout_for_rate_limit_error_with_healthy_deployments(
"litellm_params": {
"api_key": "my-key",
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com",
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
},
"model_info": {
"id": "0e30bc8a63fa91ae4415d4234e231b3f9e6dd900cac57d118ce13a720d95e9d6",
@ -615,7 +615,7 @@ def test_timeout_for_rate_limit_error_with_no_healthy_deployments():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -650,7 +650,7 @@ def test_no_retry_for_not_found_error_404():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -709,7 +709,7 @@ def test_no_retry_when_no_healthy_deployments():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),

View file

@ -30,7 +30,7 @@ def test_router_timeouts():
{
"model_name": "openai-gpt-4",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "os.environ/AZURE_API_KEY",
"api_base": "os.environ/AZURE_API_BASE",
"api_version": "os.environ/AZURE_API_VERSION",

View file

@ -32,7 +32,7 @@ def test_returned_settings():
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -96,7 +96,7 @@ def test_update_kwargs_before_fallbacks_unit_test():
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
@ -133,7 +133,7 @@ async def test_update_kwargs_before_fallbacks(call_type):
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),

View file

@ -241,7 +241,7 @@ tools_schema = [
def test_completion_azure_stream_special_char():
litellm.set_verbose = True
messages = [{"role": "user", "content": "hi. respond with the <xml> tag only"}]
response = completion(model="azure/chatgpt-v-2", messages=messages, stream=True)
response = completion(model="azure/chatgpt-v-3", messages=messages, stream=True)
response_str = ""
for part in response:
response_str += part.choices[0].delta.content or ""
@ -449,7 +449,7 @@ def test_completion_azure_stream():
},
]
response = completion(
model="azure/chatgpt-v-2", messages=messages, stream=True, max_tokens=50
model="azure/chatgpt-v-3", messages=messages, stream=True, max_tokens=50
)
complete_response = ""
# Add any assertions here to check the response
@ -2070,7 +2070,7 @@ def test_openai_chat_completion_complete_response_call():
"model",
[
"gpt-3.5-turbo",
"azure/chatgpt-v-2",
"azure/chatgpt-v-3",
"claude-3-haiku-20240307",
"o1-preview",
"o1",

View file

@ -23,7 +23,7 @@ import litellm
[
("gpt-3.5-turbo", "openai"),
("anthropic.claude-instant-v1", "bedrock"),
("azure/chatgpt-v-2", "azure"),
("azure/chatgpt-v-3", "azure"),
],
)
@pytest.mark.parametrize("sync_mode", [True, False])
@ -104,7 +104,7 @@ def test_hanging_request_azure():
{
"model_name": "azure-gpt",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_base": os.environ["AZURE_API_BASE"],
"api_key": os.environ["AZURE_API_KEY"],
},
@ -158,7 +158,7 @@ def test_hanging_request_openai():
{
"model_name": "azure-gpt",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_base": os.environ["AZURE_API_BASE"],
"api_key": os.environ["AZURE_API_KEY"],
},

View file

@ -45,7 +45,7 @@ def test_tpm_rpm_updated():
)
model_group = "gpt-3.5-turbo"
deployment_id = "1234"
deployment = "azure/chatgpt-v-2"
deployment = "azure/chatgpt-v-3"
total_tokens = 50
standard_logging_payload: StandardLoggingPayload = create_standard_logging_payload()
standard_logging_payload["model_group"] = model_group
@ -100,12 +100,12 @@ def test_get_available_deployments():
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "azure/chatgpt-v-2"},
"litellm_params": {"model": "azure/chatgpt-v-3"},
"model_info": {"id": "1234"},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "azure/chatgpt-v-2"},
"litellm_params": {"model": "azure/chatgpt-v-3"},
"model_info": {"id": "5678"},
},
]
@ -116,7 +116,7 @@ def test_get_available_deployments():
## DEPLOYMENT 1 ##
total_tokens = 50
deployment_id = "1234"
deployment = "azure/chatgpt-v-2"
deployment = "azure/chatgpt-v-3"
standard_logging_payload = create_standard_logging_payload()
standard_logging_payload["model_group"] = model_group
standard_logging_payload["model_id"] = deployment_id
@ -721,7 +721,7 @@ async def test_tpm_rpm_routing_model_name_checks():
deployment = {
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"mock_response": "Hey, how's it going?",
@ -763,5 +763,5 @@ async def test_tpm_rpm_routing_model_name_checks():
assert (
standard_logging_payload["hidden_params"]["litellm_model_name"]
== "azure/chatgpt-v-2"
== "azure/chatgpt-v-3"
)

View file

@ -56,7 +56,7 @@ def test_get_api_base_unit_test(model, optional_params, expected_api_base):
async def test_get_api_base():
_pl = ProxyLogging(user_api_key_cache=DualCache())
_pl.update_values(alerting=["slack"], alerting_threshold=100, redis_cache=None)
model = "chatgpt-v-2"
model = "chatgpt-v-3"
messages = [{"role": "user", "content": "Hey how's it going?"}]
litellm_params = {
"acompletion": True,

View file

@ -244,7 +244,7 @@ async def make_async_calls():
for _ in range(5):
task = asyncio.create_task(
litellm.acompletion(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[{"role": "user", "content": "This is a test"}],
max_tokens=5,
temperature=0.7,

View file

@ -40,7 +40,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
input_args: dict = {
"kwargs": {
"model": "chatgpt-v-2",
"model": "chatgpt-v-3",
"messages": [
{"role": "system", "content": "you are a helpful assistant.\n"},
{"role": "user", "content": "bom dia"},
@ -89,7 +89,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
},
"endpoint": "http://localhost:4000/chat/completions",
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"deployment": "azure/chatgpt-v-3",
"model_info": {
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
"db_model": False,
@ -99,7 +99,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
"error_information": None,
"status": "success",
"proxy_server_request": "{}",
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
},
"model_info": {
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
@ -158,7 +158,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
"api_base": "openai-gpt-4-test-v-1.openai.azure.com",
"acompletion": True,
"complete_input_dict": {
"model": "chatgpt-v-2",
"model": "chatgpt-v-3",
"messages": [
{"role": "system", "content": "you are a helpful assistant.\n"},
{"role": "user", "content": "bom dia"},

View file

@ -25,7 +25,7 @@ config = {
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.environ["AZURE_API_KEY"],
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
"api_version": "2023-07-01-preview",

View file

@ -9,7 +9,7 @@
# chat = ChatOpenAI(
# openai_api_base="http://0.0.0.0:8000",
# model = "azure/chatgpt-v-2",
# model = "azure/chatgpt-v-3",
# temperature=0.1,
# extra_body={
# "metadata": {

View file

@ -39,7 +39,7 @@ client = openai.AzureOpenAI(
)
try:
response = client.chat.completions.create(
model="chatgpt-v-2",
model="chatgpt-v-3",
messages=[
{
"role": "user",

View file

@ -4,7 +4,7 @@ client = openai.OpenAI(api_key="hi", base_url="http://0.0.0.0:8000")
# # request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[
{"role": "user", "content": "this is a test request, write a short poem"}
],

View file

@ -1,7 +1,7 @@
model_list:
- model_name: gpt-4-team1
litellm_params:
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
api_version: "2023-05-15"
api_key: os.environ/AZURE_API_KEY

View file

@ -5,12 +5,12 @@ model_list:
model: gpt-3.5-turbo
- model_name: working-azure-gpt-3.5-turbo
litellm_params:
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
- model_name: azure-gpt-3.5-turbo
litellm_params:
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
api_base: os.environ/AZURE_API_BASE
api_key: bad-key
- model_name: azure-embedding

View file

@ -1,7 +1,7 @@
model_list:
- model_name: azure-cloudflare
litellm_params:
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
api_key: os.environ/AZURE_API_KEY
api_version: 2023-07-01-preview

View file

@ -12,7 +12,7 @@ model_list:
- litellm_params:
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
api_key: os.environ/AZURE_API_KEY
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
model_name: azure-cloudflare-model
- litellm_params:
api_base: https://openai-france-1234.openai.azure.com

View file

@ -1,7 +1,7 @@
model_list:
- model_name: Azure OpenAI GPT-4 Canada
litellm_params:
model: azure/chatgpt-v-2
model: azure/chatgpt-v-3
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview"

View file

@ -1546,7 +1546,7 @@ def test_call_with_key_over_budget(prisma_client):
)
await proxy_db_logger._PROXY_track_cost_callback(
kwargs={
"model": "chatgpt-v-2",
"model": "chatgpt-v-3",
"stream": False,
"litellm_params": {
"metadata": {
@ -1578,7 +1578,7 @@ def test_call_with_key_over_budget(prisma_client):
assert spend_log.request_id == request_id
assert spend_log.spend == float("2e-05")
assert spend_log.model == "chatgpt-v-2"
assert spend_log.model == "chatgpt-v-3"
assert (
spend_log.cache_key
== "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
@ -1669,7 +1669,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
proxy_db_logger = _ProxyDBLogger()
await proxy_db_logger._PROXY_track_cost_callback(
kwargs={
"model": "chatgpt-v-2",
"model": "chatgpt-v-3",
"stream": False,
"litellm_params": {
"metadata": {
@ -1702,7 +1702,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
assert spend_log.request_id == request_id
assert spend_log.spend == float("2e-05")
assert spend_log.model == "chatgpt-v-2"
assert spend_log.model == "chatgpt-v-3"
assert (
spend_log.cache_key
== "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
@ -1757,7 +1757,7 @@ async def test_call_with_key_over_model_budget(
try:
# set budget for chatgpt-v-2 to 0.000001, expect the next request to fail
# set budget for chatgpt-v-3 to 0.000001, expect the next request to fail
model_max_budget = {
"gpt-4o-mini": {
"budget_limit": "0.000001",
@ -1898,7 +1898,7 @@ async def test_call_with_key_never_over_budget(prisma_client):
)
await proxy_db_logger._PROXY_track_cost_callback(
kwargs={
"model": "chatgpt-v-2",
"model": "chatgpt-v-3",
"stream": False,
"litellm_params": {
"metadata": {
@ -1987,7 +1987,7 @@ async def test_call_with_key_over_budget_stream(prisma_client):
await proxy_db_logger._PROXY_track_cost_callback(
kwargs={
"call_type": "acompletion",
"model": "sagemaker-chatgpt-v-2",
"model": "sagemaker-chatgpt-v-3",
"stream": True,
"complete_streaming_response": resp,
"litellm_params": {
@ -2431,7 +2431,7 @@ async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
await proxy_db_logger._PROXY_track_cost_callback(
kwargs={
"call_type": "acompletion",
"model": "sagemaker-chatgpt-v-2",
"model": "sagemaker-chatgpt-v-3",
"stream": True,
"complete_streaming_response": resp,
"litellm_params": {

View file

@ -164,7 +164,7 @@ def test_chat_completion(client):
my_custom_logger.async_success == True
) # checks if the status of async_success is True, only the async_log_success_event can set this to true
assert (
my_custom_logger.async_completion_kwargs["model"] == "chatgpt-v-2"
my_custom_logger.async_completion_kwargs["model"] == "chatgpt-v-3"
) # checks if kwargs passed to async_log_success_event are correct
print(
"\n\n Custom Logger Async Completion args",

View file

@ -64,7 +64,7 @@ def test_chat_completion(client_no_auth):
ModelConfig(
model_name="user-azure-instance",
litellm_params=CompletionRequest(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
api_key=os.getenv("AZURE_API_KEY"),
api_version=os.getenv("AZURE_API_VERSION"),
api_base=os.getenv("AZURE_API_BASE"),

View file

@ -446,7 +446,7 @@ def test_chat_completion_azure(mock_acompletion, client_no_auth):
try:
# Your test data
test_data = {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [
{"role": "user", "content": "write 1 sentence poem"},
],
@ -457,7 +457,7 @@ def test_chat_completion_azure(mock_acompletion, client_no_auth):
response = client_no_auth.post("/v1/chat/completions", json=test_data)
mock_acompletion.assert_called_once_with(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[
{"role": "user", "content": "write 1 sentence poem"},
],
@ -489,19 +489,19 @@ def test_openai_deployments_model_chat_completions_azure(
try:
# Your test data
test_data = {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [
{"role": "user", "content": "write 1 sentence poem"},
],
"max_tokens": 10,
}
url = "/openai/deployments/azure/chatgpt-v-2/chat/completions"
url = "/openai/deployments/azure/chatgpt-v-3/chat/completions"
print(f"testing proxy server with Azure Request {url}")
response = client_no_auth.post(url, json=test_data)
mock_acompletion.assert_called_once_with(
model="azure/chatgpt-v-2",
model="azure/chatgpt-v-3",
messages=[
{"role": "user", "content": "write 1 sentence poem"},
],
@ -1314,7 +1314,7 @@ async def test_add_callback_via_key(prisma_client):
try:
# Your test data
test_data = {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [
{"role": "user", "content": "write 1 sentence poem"},
],
@ -1408,7 +1408,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(
request._url = URL(url="/chat/completions")
test_data = {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [
{"role": "user", "content": "write 1 sentence poem"},
],
@ -1423,7 +1423,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(
data = {
"data": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [{"role": "user", "content": "write 1 sentence poem"}],
"max_tokens": 10,
"mock_response": "Hello world",
@ -1523,7 +1523,7 @@ async def test_disable_fallbacks_by_key(disable_fallbacks_set):
key_metadata = {"disable_fallbacks": disable_fallbacks_set}
existing_data = {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [{"role": "user", "content": "write 1 sentence poem"}],
}
data = LiteLLMProxyRequestSetup.add_key_level_controls(
@ -1564,7 +1564,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket(
request._url = URL(url="/chat/completions")
test_data = {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [
{"role": "user", "content": "write 1 sentence poem"},
],
@ -1579,7 +1579,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket(
data = {
"data": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [{"role": "user", "content": "write 1 sentence poem"}],
"max_tokens": 10,
"mock_response": "Hello world",
@ -1697,7 +1697,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith(
request._url = URL(url="/chat/completions")
test_data = {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [
{"role": "user", "content": "write 1 sentence poem"},
],
@ -1712,7 +1712,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith(
data = {
"data": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"messages": [{"role": "user", "content": "write 1 sentence poem"}],
"max_tokens": 10,
"mock_response": "Hello world",

View file

@ -171,7 +171,7 @@
# model_data = {
# "model_name": "azure-model",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "model": "azure/chatgpt-v-3",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "api_version": os.getenv("AZURE_API_VERSION")

View file

@ -67,7 +67,7 @@ async def add_models(session, model_id="123", model_name="azure-gpt-3.5", key="s
data = {
"model_name": model_name,
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "os.environ/AZURE_API_KEY",
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
"api_version": "2023-05-15",
@ -100,7 +100,7 @@ async def update_model(session, model_id="123", model_name="azure-gpt-3.5", key=
data = {
"model_name": model_name,
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": "os.environ/AZURE_API_KEY",
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
"api_version": "2023-05-15",
@ -292,7 +292,7 @@ async def add_model_for_health_checking(session, model_id="123"):
data = {
"model_name": f"azure-model-health-check-{model_id}",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"model": "azure/chatgpt-v-3",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
"api_version": "2023-05-15",
@ -417,7 +417,7 @@ async def test_add_model_run_health():
assert _health_info["healthy_count"] == 1
assert (
_healthy_endpooint["model"] == "azure/chatgpt-v-2"
_healthy_endpooint["model"] == "azure/chatgpt-v-3"
) # this is the model that got added
# assert httpx client is is unchanges