From f7dd68803564779318f320fbfdfe4e5345ab89eb Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 18 Apr 2025 08:42:12 -0700 Subject: [PATCH 1/5] test: handle cohere rbac issue (verified happens on calling azure directly) --- tests/local_testing/test_completion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py index d56475e1a0..041a49cb99 100644 --- a/tests/local_testing/test_completion.py +++ b/tests/local_testing/test_completion.py @@ -131,15 +131,15 @@ def test_null_role_response(): assert response.choices[0].message.role == "assistant" - +@pytest.mark.skip(reason="Cohere having RBAC issues") def test_completion_azure_command_r(): try: - litellm.set_verbose = True + litellm._turn_on_debug() response = completion( model="azure/command-r-plus", - api_base=os.getenv("AZURE_COHERE_API_BASE"), - api_key=os.getenv("AZURE_COHERE_API_KEY"), + api_base="https://Cohere-command-r-plus-gylpd-serverless.eastus2.inference.ai.azure.com", + api_key="AO89xyvmOLLMgoMI7WaiEaP0t6M09itr", messages=[{"role": "user", "content": "What is the meaning of life?"}], ) From de3c2d14bff66048c313d5768d46e3577982176a Mon Sep 17 00:00:00 2001 From: David Emmanuel Date: Fri, 18 Apr 2025 17:44:46 +0100 Subject: [PATCH 2/5] Add Gemini Flash 2.5 Preview Model Price and Context Window (#10125) * Update model_prices_and_context_window_backup.json * Update model_prices_and_context_window.json --- ...odel_prices_and_context_window_backup.json | 27 +++++++++++++++++++ model_prices_and_context_window.json | 27 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 8a10b97b1e..4b0d657e05 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -5168,6 +5168,33 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, + "gemini/gemini-2.5-flash-preview-04-17": { + "max_tokens": 65536, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_audio_token": 0.0000001, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060, + "litellm_provider": "gemini", + "mode": "chat", + "rpm": 10, + "tpm": 250000, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": false, + "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview" + }, "gemini-2.0-flash": { "max_tokens": 8192, "max_input_tokens": 1048576, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 8a10b97b1e..4b0d657e05 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -5168,6 +5168,33 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, + "gemini/gemini-2.5-flash-preview-04-17": { + "max_tokens": 65536, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_audio_token": 0.0000001, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000060, + "litellm_provider": "gemini", + "mode": "chat", + "rpm": 10, + "tpm": 250000, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": false, + "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview" + }, "gemini-2.0-flash": { "max_tokens": 8192, "max_input_tokens": 1048576, From 415abfc222a01bac7e67e3c547ef64421022c23d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 18 Apr 2025 13:13:58 -0700 Subject: [PATCH 3/5] test: update test --- tests/local_testing/test_completion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py index 041a49cb99..2294ca4f0f 100644 --- a/tests/local_testing/test_completion.py +++ b/tests/local_testing/test_completion.py @@ -1332,7 +1332,7 @@ def test_completion_fireworks_ai(): }, ] response = completion( - model="fireworks_ai/mixtral-8x7b-instruct", + model="fireworks_ai/llama4-maverick-instruct-basic", messages=messages, ) print(response) From 1ea046cc617f277e6d7d526645b0392a5f1ad149 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Fri, 18 Apr 2025 14:22:12 -0700 Subject: [PATCH 4/5] test: update tests to new deployment model (#10142) * test: update tests to new deployment model * test: update model name * test: skip cohere rbac issue test * test: update test - replace gpt-4o model --- tests/llm_translation/test_azure_ai.py | 23 ++++--- tests/llm_translation/test_azure_openai.py | 8 +-- tests/llm_translation/test_optional_params.py | 4 +- tests/llm_translation/test_rerank.py | 1 + tests/load_tests/test_datadog_load_test.py | 2 +- tests/load_tests/test_otel_load_test.py | 2 +- .../example_config_yaml/azure_config.yaml | 2 +- tests/local_testing/test_acooldowns_router.py | 10 +-- tests/local_testing/test_alangfuse.py | 2 +- tests/local_testing/test_assistants.py | 2 +- tests/local_testing/test_azure_openai.py | 4 +- tests/local_testing/test_azure_perf.py | 4 +- tests/local_testing/test_caching.py | 14 ++-- tests/local_testing/test_caching_ssl.py | 2 +- tests/local_testing/test_class.py | 4 +- tests/local_testing/test_completion.py | 34 +++++----- tests/local_testing/test_config.py | 6 +- .../test_configs/test_bad_config.yaml | 4 +- ...st_cloudflare_azure_with_cache_config.yaml | 2 +- .../test_configs/test_config_no_auth.yaml | 2 +- .../test_configs/test_custom_logger.yaml | 2 +- .../test_custom_callback_input.py | 22 +++--- .../test_custom_callback_router.py | 10 +-- tests/local_testing/test_custom_logger.py | 6 +- tests/local_testing/test_exceptions.py | 22 +++--- tests/local_testing/test_gcs_bucket.py | 12 ++-- tests/local_testing/test_health_check.py | 2 +- .../test_helicone_integration.py | 2 +- .../local_testing/test_least_busy_routing.py | 6 +- .../local_testing/test_load_test_router_s3.py | 2 +- tests/local_testing/test_loadtest_router.py | 2 +- .../local_testing/test_lowest_cost_routing.py | 2 +- .../test_lowest_latency_routing.py | 28 ++++---- tests/local_testing/test_mem_usage.py | 4 +- tests/local_testing/test_mock_request.py | 2 +- .../local_testing/test_prometheus_service.py | 4 +- .../test_prompt_injection_detection.py | 2 +- .../test_provider_specific_config.py | 4 +- tests/local_testing/test_router.py | 28 ++++---- .../test_router_budget_limiter.py | 4 +- tests/local_testing/test_router_caching.py | 6 +- .../local_testing/test_router_client_init.py | 2 +- tests/local_testing/test_router_cooldowns.py | 2 +- tests/local_testing/test_router_debug_logs.py | 4 +- tests/local_testing/test_router_fallbacks.py | 38 +++++------ .../test_router_get_deployments.py | 68 +++++++++---------- tests/local_testing/test_router_init.py | 14 ++-- .../test_router_policy_violation.py | 2 +- tests/local_testing/test_router_retries.py | 32 ++++----- tests/local_testing/test_router_timeout.py | 2 +- tests/local_testing/test_router_utils.py | 6 +- tests/local_testing/test_streaming.py | 6 +- tests/local_testing/test_timeout.py | 6 +- .../local_testing/test_tpm_rpm_routing_v2.py | 12 ++-- tests/logging_callback_tests/test_alerting.py | 2 +- .../test_amazing_s3_logs.py | 2 +- .../logging_callback_tests/test_spend_logs.py | 8 +-- tests/old_proxy_tests/tests/load_test_q.py | 2 +- .../tests/test_langchain_request.py | 2 +- .../tests/test_openai_exception_request.py | 2 +- .../tests/test_openai_request.py | 2 +- .../example_config_yaml/azure_config.yaml | 2 +- .../test_configs/test_bad_config.yaml | 4 +- ...st_cloudflare_azure_with_cache_config.yaml | 2 +- .../test_configs/test_config_no_auth.yaml | 2 +- .../test_configs/test_custom_logger.yaml | 2 +- .../test_key_generate_prisma.py | 16 ++--- .../test_proxy_custom_logger.py | 2 +- .../test_proxy_pass_user_config.py | 2 +- tests/proxy_unit_tests/test_proxy_server.py | 26 +++---- .../test_proxy_server_keys.py | 2 +- tests/test_models.py | 8 +-- 72 files changed, 294 insertions(+), 292 deletions(-) diff --git a/tests/llm_translation/test_azure_ai.py b/tests/llm_translation/test_azure_ai.py index 6ec2050638..62f68b02d3 100644 --- a/tests/llm_translation/test_azure_ai.py +++ b/tests/llm_translation/test_azure_ai.py @@ -14,7 +14,7 @@ from litellm.llms.anthropic.chat import ModelResponseIterator import httpx import json from litellm.llms.custom_httpx.http_handler import HTTPHandler -from base_rerank_unit_tests import BaseLLMRerankTest +# from base_rerank_unit_tests import BaseLLMRerankTest load_dotenv() import io @@ -255,16 +255,17 @@ def test_azure_deepseek_reasoning_content(): assert response.choices[0].message.content == "\n\nThe sky is a canvas of blue" -class TestAzureAIRerank(BaseLLMRerankTest): - def get_custom_llm_provider(self) -> litellm.LlmProviders: - return litellm.LlmProviders.AZURE_AI +# skipping due to cohere rbac issues +# class TestAzureAIRerank(BaseLLMRerankTest): +# def get_custom_llm_provider(self) -> litellm.LlmProviders: +# return litellm.LlmProviders.AZURE_AI - def get_base_rerank_call_args(self) -> dict: - return { - "model": "azure_ai/cohere-rerank-v3-english", - "api_base": os.getenv("AZURE_AI_COHERE_API_BASE"), - "api_key": os.getenv("AZURE_AI_COHERE_API_KEY"), - } +# def get_base_rerank_call_args(self) -> dict: +# return { +# "model": "azure_ai/cohere-rerank-v3-english", +# "api_base": os.getenv("AZURE_AI_COHERE_API_BASE"), +# "api_key": os.getenv("AZURE_AI_COHERE_API_KEY"), +# } @pytest.mark.asyncio @@ -279,7 +280,7 @@ async def test_azure_ai_request_format(): # Set up the test parameters api_key = os.getenv("AZURE_API_KEY") - api_base = f"{os.getenv('AZURE_API_BASE')}/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview" + api_base = f"{os.getenv('AZURE_API_BASE')}/openai/deployments/gpt-4o-new-test/chat/completions?api-version=2024-08-01-preview" model = "azure_ai/gpt-4o" messages = [ {"role": "user", "content": "hi"}, diff --git a/tests/llm_translation/test_azure_openai.py b/tests/llm_translation/test_azure_openai.py index d289c892a0..72ea3ec27e 100644 --- a/tests/llm_translation/test_azure_openai.py +++ b/tests/llm_translation/test_azure_openai.py @@ -137,7 +137,7 @@ def test_azure_extra_headers(input, call_type, header_value): func = image_generation data = { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com", "api_version": "2023-07-01-preview", "api_key": "my-azure-api-key", @@ -339,7 +339,7 @@ def test_azure_gpt_4o_with_tool_call_and_response_format(api_version): with patch.object(client.chat.completions.with_raw_response, "create") as mock_post: response = litellm.completion( - model="azure/gpt-4o", + model="azure/gpt-4o-new-test", messages=[ { "role": "system", @@ -474,7 +474,7 @@ def test_azure_max_retries_0( try: completion( - model="azure/gpt-4o", + model="azure/gpt-4o-new-test", messages=[{"role": "user", "content": "Hello world"}], max_retries=max_retries, stream=stream, @@ -502,7 +502,7 @@ async def test_async_azure_max_retries_0( try: await acompletion( - model="azure/gpt-4o", + model="azure/gpt-4o-new-test", messages=[{"role": "user", "content": "Hello world"}], max_retries=max_retries, stream=stream, diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py index 4545ebbe20..e207e367e4 100644 --- a/tests/llm_translation/test_optional_params.py +++ b/tests/llm_translation/test_optional_params.py @@ -217,7 +217,7 @@ def test_openai_optional_params_embeddings(): def test_azure_optional_params_embeddings(): litellm.drop_params = True optional_params = get_optional_params_embeddings( - model="chatgpt-v-2", + model="chatgpt-v-3", user="John", encoding_format=None, custom_llm_provider="azure", @@ -396,7 +396,7 @@ def test_azure_tool_choice(api_version): """ litellm.drop_params = True optional_params = litellm.utils.get_optional_params( - model="chatgpt-v-2", + model="chatgpt-v-3", user="John", custom_llm_provider="azure", max_tokens=10, diff --git a/tests/llm_translation/test_rerank.py b/tests/llm_translation/test_rerank.py index 5de6c1a8ec..e9d9e38951 100644 --- a/tests/llm_translation/test_rerank.py +++ b/tests/llm_translation/test_rerank.py @@ -150,6 +150,7 @@ async def test_basic_rerank_together_ai(sync_mode): @pytest.mark.asyncio() @pytest.mark.parametrize("sync_mode", [True, False]) +@pytest.mark.skip(reason="Skipping test due to Cohere RBAC issues") async def test_basic_rerank_azure_ai(sync_mode): import os diff --git a/tests/load_tests/test_datadog_load_test.py b/tests/load_tests/test_datadog_load_test.py index b56c82288e..f4328b71b1 100644 --- a/tests/load_tests/test_datadog_load_test.py +++ b/tests/load_tests/test_datadog_load_test.py @@ -91,7 +91,7 @@ async def make_async_calls(metadata=None, **completion_kwargs): def create_async_task(**completion_kwargs): litellm.set_verbose = True completion_args = { - "model": "openai/chatgpt-v-2", + "model": "openai/chatgpt-v-3", "api_version": "2024-02-01", "messages": [{"role": "user", "content": "This is a test"}], "max_tokens": 5, diff --git a/tests/load_tests/test_otel_load_test.py b/tests/load_tests/test_otel_load_test.py index 50e5748686..f5754c0c40 100644 --- a/tests/load_tests/test_otel_load_test.py +++ b/tests/load_tests/test_otel_load_test.py @@ -86,7 +86,7 @@ def create_async_task(**completion_kwargs): By default a standard set of arguments are used for the litellm.acompletion function. """ completion_args = { - "model": "openai/chatgpt-v-2", + "model": "openai/chatgpt-v-3", "api_version": "2024-02-01", "messages": [{"role": "user", "content": "This is a test" * 100}], "max_tokens": 5, diff --git a/tests/local_testing/example_config_yaml/azure_config.yaml b/tests/local_testing/example_config_yaml/azure_config.yaml index fd5865cd7c..111813c884 100644 --- a/tests/local_testing/example_config_yaml/azure_config.yaml +++ b/tests/local_testing/example_config_yaml/azure_config.yaml @@ -1,7 +1,7 @@ model_list: - model_name: gpt-4-team1 litellm_params: - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ api_version: "2023-05-15" api_key: os.environ/AZURE_API_KEY diff --git a/tests/local_testing/test_acooldowns_router.py b/tests/local_testing/test_acooldowns_router.py index df3f493a68..8427fd2be8 100644 --- a/tests/local_testing/test_acooldowns_router.py +++ b/tests/local_testing/test_acooldowns_router.py @@ -26,7 +26,7 @@ model_list = [ { # list of model deployments "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -143,7 +143,7 @@ async def test_cooldown_same_model_name(sync_mode): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -153,7 +153,7 @@ async def test_cooldown_same_model_name(sync_mode): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -184,7 +184,7 @@ async def test_cooldown_same_model_name(sync_mode): model_ids.append(model["model_info"]["id"]) print("\n litellm model ids ", model_ids) - # example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960'] + # example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960'] assert ( model_ids[0] != model_ids[1] ) # ensure both models have a uuid added, and they have different names @@ -201,7 +201,7 @@ async def test_cooldown_same_model_name(sync_mode): model_ids.append(model["model_info"]["id"]) print("\n litellm model ids ", model_ids) - # example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960'] + # example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960'] assert ( model_ids[0] != model_ids[1] ) # ensure both models have a uuid added, and they have different names diff --git a/tests/local_testing/test_alangfuse.py b/tests/local_testing/test_alangfuse.py index cdcf18f79f..2eea426478 100644 --- a/tests/local_testing/test_alangfuse.py +++ b/tests/local_testing/test_alangfuse.py @@ -194,7 +194,7 @@ def create_async_task(**completion_kwargs): By default a standard set of arguments are used for the litellm.acompletion function. """ completion_args = { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_version": "2024-02-01", "messages": [{"role": "user", "content": "This is a test"}], "max_tokens": 5, diff --git a/tests/local_testing/test_assistants.py b/tests/local_testing/test_assistants.py index 544523e4a0..d5755f2aba 100644 --- a/tests/local_testing/test_assistants.py +++ b/tests/local_testing/test_assistants.py @@ -71,7 +71,7 @@ async def test_create_delete_assistants(provider, sync_mode): model = "gpt-4-turbo" if provider == "azure": os.environ["AZURE_API_VERSION"] = "2024-05-01-preview" - model = "chatgpt-v-2" + model = "chatgpt-v-3" if sync_mode == True: assistant = litellm.create_assistants( diff --git a/tests/local_testing/test_azure_openai.py b/tests/local_testing/test_azure_openai.py index 4ca2a20011..8e2d2aa509 100644 --- a/tests/local_testing/test_azure_openai.py +++ b/tests/local_testing/test_azure_openai.py @@ -46,7 +46,7 @@ async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter): { "model_name": "gpt-3.5-turbo", "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_base": os.getenv("AZURE_API_BASE"), "tenant_id": os.getenv("AZURE_TENANT_ID"), "client_id": os.getenv("AZURE_CLIENT_ID"), @@ -95,6 +95,6 @@ async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter): assert json_body == { "messages": [{"role": "user", "content": "Hello world!"}], - "model": "chatgpt-v-2", + "model": "chatgpt-v-3", "stream": False, } diff --git a/tests/local_testing/test_azure_perf.py b/tests/local_testing/test_azure_perf.py index b7d7abd553..bc6d694b78 100644 --- a/tests/local_testing/test_azure_perf.py +++ b/tests/local_testing/test_azure_perf.py @@ -18,7 +18,7 @@ # { # "model_name": "azure-test", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_base": os.getenv("AZURE_API_BASE"), # "api_version": os.getenv("AZURE_API_VERSION"), @@ -33,7 +33,7 @@ # try: # start_time = time.time() # response = await client.chat.completions.create( -# model="chatgpt-v-2", +# model="chatgpt-v-3", # messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}], # stream=True, # ) diff --git a/tests/local_testing/test_caching.py b/tests/local_testing/test_caching.py index 43dafd7293..df0b625d7d 100644 --- a/tests/local_testing/test_caching.py +++ b/tests/local_testing/test_caching.py @@ -324,7 +324,7 @@ def test_caching_with_models_v2(): litellm.set_verbose = True response1 = completion(model="gpt-3.5-turbo", messages=messages, caching=True) response2 = completion(model="gpt-3.5-turbo", messages=messages, caching=True) - response3 = completion(model="azure/chatgpt-v-2", messages=messages, caching=True) + response3 = completion(model="azure/chatgpt-v-3", messages=messages, caching=True) print(f"response1: {response1}") print(f"response2: {response2}") print(f"response3: {response3}") @@ -1170,7 +1170,7 @@ async def test_s3_cache_stream_azure(sync_mode): if sync_mode: response1 = litellm.completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, max_tokens=40, temperature=1, @@ -1183,7 +1183,7 @@ async def test_s3_cache_stream_azure(sync_mode): print(response_1_content) else: response1 = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, max_tokens=40, temperature=1, @@ -1203,7 +1203,7 @@ async def test_s3_cache_stream_azure(sync_mode): if sync_mode: response2 = litellm.completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, max_tokens=40, temperature=1, @@ -1216,7 +1216,7 @@ async def test_s3_cache_stream_azure(sync_mode): print(response_2_content) else: response2 = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, max_tokens=40, temperature=1, @@ -1279,7 +1279,7 @@ async def test_s3_cache_acompletion_azure(): print("s3 Cache: test for caching, streaming + completion") response1 = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, max_tokens=40, temperature=1, @@ -1289,7 +1289,7 @@ async def test_s3_cache_acompletion_azure(): time.sleep(2) response2 = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, max_tokens=40, temperature=1, diff --git a/tests/local_testing/test_caching_ssl.py b/tests/local_testing/test_caching_ssl.py index 1b642f7674..8194115ef1 100644 --- a/tests/local_testing/test_caching_ssl.py +++ b/tests/local_testing/test_caching_ssl.py @@ -58,7 +58,7 @@ def test_caching_router(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_class.py b/tests/local_testing/test_class.py index a15f362372..e6b711efe8 100644 --- a/tests/local_testing/test_class.py +++ b/tests/local_testing/test_class.py @@ -55,7 +55,7 @@ # # { # # "model_name": "gpt-3.5-turbo", # openai model name # # "litellm_params": { # params for litellm completion/embedding call -# # "model": "azure/chatgpt-v-2", +# # "model": "azure/chatgpt-v-3", # # "api_key": os.getenv("AZURE_API_KEY"), # # "api_version": os.getenv("AZURE_API_VERSION"), # # "api_base": os.getenv("AZURE_API_BASE"), @@ -93,7 +93,7 @@ # # { # # "model_name": "gpt-3.5-turbo", # openai model name # # "litellm_params": { # params for litellm completion/embedding call -# # "model": "azure/chatgpt-v-2", +# # "model": "azure/chatgpt-v-3", # # "api_key": os.getenv("AZURE_API_KEY"), # # "api_version": os.getenv("AZURE_API_VERSION"), # # "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py index 2294ca4f0f..9f573662f7 100644 --- a/tests/local_testing/test_completion.py +++ b/tests/local_testing/test_completion.py @@ -732,7 +732,7 @@ def encode_image(image_path): "model", [ "gpt-4o", - "azure/gpt-4o", + "azure/gpt-4o-new-test", "anthropic/claude-3-opus-20240229", ], ) # @@ -1824,9 +1824,9 @@ def test_completion_openai(): "model, api_version", [ # ("gpt-4o-2024-08-06", None), - # ("azure/chatgpt-v-2", None), + # ("azure/chatgpt-v-3", None), ("bedrock/anthropic.claude-3-sonnet-20240229-v1:0", None), - # ("azure/gpt-4o", "2024-08-01-preview"), + # ("azure/gpt-4o-new-test", "2024-08-01-preview"), ], ) @pytest.mark.flaky(retries=3, delay=1) @@ -2495,7 +2495,7 @@ def test_completion_azure_extra_headers(): litellm.client_session = http_client try: response = completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, api_base=os.getenv("AZURE_API_BASE"), api_version="2023-07-01-preview", @@ -2544,7 +2544,7 @@ def test_completion_azure_ad_token(): litellm.client_session = http_client try: response = completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, azure_ad_token="my-special-token", ) @@ -2575,7 +2575,7 @@ def test_completion_azure_key_completion_arg(): litellm.set_verbose = True ## Test azure call response = completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, api_key=old_key, logprobs=True, @@ -2633,7 +2633,7 @@ async def test_re_use_azure_async_client(): ## Test azure call for _ in range(3): response = await litellm.acompletion( - model="azure/chatgpt-v-2", messages=messages, client=client + model="azure/chatgpt-v-3", messages=messages, client=client ) print(f"response: {response}") except Exception as e: @@ -2665,7 +2665,7 @@ def test_completion_azure(): litellm.set_verbose = False ## Test azure call response = completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, api_key="os.environ/AZURE_API_KEY", ) @@ -2673,7 +2673,7 @@ def test_completion_azure(): print(f"response hidden params: {response._hidden_params}") ## Test azure flag for backwards-compat # response = completion( - # model="chatgpt-v-2", + # model="chatgpt-v-3", # messages=messages, # azure=True, # max_tokens=10 @@ -2712,7 +2712,7 @@ def test_azure_openai_ad_token(): litellm.input_callback = [tester] try: response = litellm.completion( - model="azure/chatgpt-v-2", # e.g. gpt-35-instant + model="azure/chatgpt-v-3", # e.g. gpt-35-instant messages=[ { "role": "user", @@ -2750,7 +2750,7 @@ def test_completion_azure2(): ## Test azure call response = completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, api_base=api_base, api_key=api_key, @@ -2787,7 +2787,7 @@ def test_completion_azure3(): ## Test azure call response = completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, max_tokens=10, ) @@ -2835,7 +2835,7 @@ def test_completion_azure_with_litellm_key(): openai.api_key = "ymca" response = completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, ) # Add any assertions here to check the response @@ -2863,7 +2863,7 @@ def test_completion_azure_deployment_id(): try: litellm.set_verbose = True response = completion( - deployment_id="chatgpt-v-2", + deployment_id="chatgpt-v-3", model="gpt-3.5-turbo", messages=messages, ) @@ -3925,7 +3925,7 @@ def test_completion_stream_watsonx(): @pytest.mark.parametrize( "provider, model, project, region_name, token", [ - ("azure", "chatgpt-v-2", None, None, "test-token"), + ("azure", "chatgpt-v-3", None, None, "test-token"), ("vertex_ai", "anthropic-claude-3", "adroit-crow-1", "us-east1", None), ("watsonx", "ibm/granite", "96946574", "dallas", "1234"), ("bedrock", "anthropic.claude-3", None, "us-east-1", None), @@ -4178,7 +4178,7 @@ async def test_completion_ai21_chat(): @pytest.mark.parametrize( "model", - ["gpt-4o", "azure/chatgpt-v-2"], + ["gpt-4o", "azure/chatgpt-v-3"], ) @pytest.mark.parametrize( "stream", @@ -4200,7 +4200,7 @@ def test_completion_response_ratelimit_headers(model, stream): assert "x-ratelimit-remaining-requests" in additional_headers assert "x-ratelimit-remaining-tokens" in additional_headers - if model == "azure/chatgpt-v-2": + if model == "azure/chatgpt-v-3": # Azure OpenAI header assert "llm_provider-azureml-model-session" in additional_headers if model == "claude-3-sonnet-20240229": diff --git a/tests/local_testing/test_config.py b/tests/local_testing/test_config.py index ab8365b2d1..b56be32881 100644 --- a/tests/local_testing/test_config.py +++ b/tests/local_testing/test_config.py @@ -46,7 +46,7 @@ async def test_delete_deployment(): import base64 litellm_params = LiteLLM_Params( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", api_key=os.getenv("AZURE_API_KEY"), api_base=os.getenv("AZURE_API_BASE"), api_version=os.getenv("AZURE_API_VERSION"), @@ -232,7 +232,7 @@ async def test_db_error_new_model_check(): litellm_params = LiteLLM_Params( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", api_key=os.getenv("AZURE_API_KEY"), api_base=os.getenv("AZURE_API_BASE"), api_version=os.getenv("AZURE_API_VERSION"), @@ -250,7 +250,7 @@ def _create_model_list(flag_value: Literal[0, 1], master_key: str): import base64 new_litellm_params = LiteLLM_Params( - model="azure/chatgpt-v-2-3", + model="azure/chatgpt-v-3-3", api_key=os.getenv("AZURE_API_KEY"), api_base=os.getenv("AZURE_API_BASE"), api_version=os.getenv("AZURE_API_VERSION"), diff --git a/tests/local_testing/test_configs/test_bad_config.yaml b/tests/local_testing/test_configs/test_bad_config.yaml index 7c802a8408..0a16ecb3c5 100644 --- a/tests/local_testing/test_configs/test_bad_config.yaml +++ b/tests/local_testing/test_configs/test_bad_config.yaml @@ -5,12 +5,12 @@ model_list: model: gpt-3.5-turbo - model_name: working-azure-gpt-3.5-turbo litellm_params: - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 api_base: os.environ/AZURE_API_BASE api_key: os.environ/AZURE_API_KEY - model_name: azure-gpt-3.5-turbo litellm_params: - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 api_base: os.environ/AZURE_API_BASE api_key: bad-key - model_name: azure-embedding diff --git a/tests/local_testing/test_configs/test_cloudflare_azure_with_cache_config.yaml b/tests/local_testing/test_configs/test_cloudflare_azure_with_cache_config.yaml index c3c3cb1c32..aeadbeb872 100644 --- a/tests/local_testing/test_configs/test_cloudflare_azure_with_cache_config.yaml +++ b/tests/local_testing/test_configs/test_cloudflare_azure_with_cache_config.yaml @@ -1,7 +1,7 @@ model_list: - model_name: azure-cloudflare litellm_params: - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1 api_key: os.environ/AZURE_API_KEY api_version: 2023-07-01-preview diff --git a/tests/local_testing/test_configs/test_config_no_auth.yaml b/tests/local_testing/test_configs/test_config_no_auth.yaml index 1c5ddf2266..075bf7a09d 100644 --- a/tests/local_testing/test_configs/test_config_no_auth.yaml +++ b/tests/local_testing/test_configs/test_config_no_auth.yaml @@ -12,7 +12,7 @@ model_list: - litellm_params: api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1 api_key: os.environ/AZURE_API_KEY - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 model_name: azure-cloudflare-model - litellm_params: api_base: https://openai-france-1234.openai.azure.com diff --git a/tests/local_testing/test_configs/test_custom_logger.yaml b/tests/local_testing/test_configs/test_custom_logger.yaml index 145c618edd..2ad500b36f 100644 --- a/tests/local_testing/test_configs/test_custom_logger.yaml +++ b/tests/local_testing/test_configs/test_custom_logger.yaml @@ -1,7 +1,7 @@ model_list: - model_name: Azure OpenAI GPT-4 Canada litellm_params: - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 api_base: os.environ/AZURE_API_BASE api_key: os.environ/AZURE_API_KEY api_version: "2023-07-01-preview" diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py index 222572935b..055ed821d0 100644 --- a/tests/local_testing/test_custom_callback_input.py +++ b/tests/local_testing/test_custom_callback_input.py @@ -450,12 +450,12 @@ def test_chat_azure_stream(): customHandler = CompletionCustomHandler() litellm.callbacks = [customHandler] response = litellm.completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}], ) # test streaming response = litellm.completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}], stream=True, ) @@ -464,7 +464,7 @@ def test_chat_azure_stream(): # test failure callback try: response = litellm.completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}], api_key="my-bad-key", stream=True, @@ -491,12 +491,12 @@ async def test_async_chat_azure_stream(): customHandler = CompletionCustomHandler() litellm.callbacks = [customHandler] response = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}], ) ## test streaming response = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}], stream=True, ) @@ -507,7 +507,7 @@ async def test_async_chat_azure_stream(): # test failure callback try: response = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}], api_key="my-bad-key", stream=True, @@ -1018,7 +1018,7 @@ async def test_async_completion_azure_caching(): litellm.callbacks = [customHandler_caching] unique_time = time.time() response1 = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[ {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"} ], @@ -1027,7 +1027,7 @@ async def test_async_completion_azure_caching(): await asyncio.sleep(1) print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}") response2 = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[ {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"} ], @@ -1056,7 +1056,7 @@ async def test_async_completion_azure_caching_streaming(): litellm.callbacks = [customHandler_caching] unique_time = uuid.uuid4() response1 = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[ {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"} ], @@ -1069,7 +1069,7 @@ async def test_async_completion_azure_caching_streaming(): initial_customhandler_caching_states = len(customHandler_caching.states) print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}") response2 = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[ {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"} ], @@ -1207,7 +1207,7 @@ def test_turn_off_message_logging(): "model", [ "ft:gpt-3.5-turbo:my-org:custom_suffix:id" - ], # "gpt-3.5-turbo", "azure/chatgpt-v-2", + ], # "gpt-3.5-turbo", "azure/chatgpt-v-3", ) @pytest.mark.parametrize( "turn_off_message_logging", diff --git a/tests/local_testing/test_custom_callback_router.py b/tests/local_testing/test_custom_callback_router.py index 310a497922..83289abf5f 100644 --- a/tests/local_testing/test_custom_callback_router.py +++ b/tests/local_testing/test_custom_callback_router.py @@ -284,7 +284,7 @@ class CompletionCustomHandler( ) if ( - kwargs["model"] == "chatgpt-v-2" + kwargs["model"] == "chatgpt-v-3" and base_model is not None and kwargs["stream"] != True ): @@ -394,7 +394,7 @@ async def test_async_chat_azure(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -438,7 +438,7 @@ async def test_async_chat_azure(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "my-bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -545,7 +545,7 @@ async def test_async_chat_azure_with_fallbacks(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "my-bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -606,7 +606,7 @@ async def test_async_completion_azure_caching(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_custom_logger.py b/tests/local_testing/test_custom_logger.py index d9eb50eb73..ba9973e11d 100644 --- a/tests/local_testing/test_custom_logger.py +++ b/tests/local_testing/test_custom_logger.py @@ -160,7 +160,7 @@ def test_completion_azure_stream_moderation_failure(): ] try: response = completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=messages, mock_response="Exception: content_filter_policy", stream=True, @@ -195,7 +195,7 @@ def test_async_custom_handler_stream(): async def test_1(): nonlocal complete_streaming_response response = await litellm.acompletion( - model="azure/chatgpt-v-2", messages=messages, stream=True + model="azure/chatgpt-v-3", messages=messages, stream=True ) async for chunk in response: complete_streaming_response += ( @@ -239,7 +239,7 @@ def test_azure_completion_stream(): complete_streaming_response = "" response = litellm.completion( - model="azure/chatgpt-v-2", messages=messages, stream=True + model="azure/chatgpt-v-3", messages=messages, stream=True ) for chunk in response: complete_streaming_response += chunk["choices"][0]["delta"]["content"] or "" diff --git a/tests/local_testing/test_exceptions.py b/tests/local_testing/test_exceptions.py index 229ea07c7a..be7710f58a 100644 --- a/tests/local_testing/test_exceptions.py +++ b/tests/local_testing/test_exceptions.py @@ -51,7 +51,7 @@ async def test_content_policy_exception_azure(): # this is ony a test - we needed some way to invoke the exception :( litellm.set_verbose = True response = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "where do I buy lethal drugs from"}], mock_response="Exception: content_filter_policy", ) @@ -124,7 +124,7 @@ def test_context_window_with_fallbacks(model): ctx_window_fallback_dict = { "command-nightly": "claude-2.1", "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k", - "azure/chatgpt-v-2": "gpt-3.5-turbo-16k", + "azure/chatgpt-v-3": "gpt-3.5-turbo-16k", } sample_text = "how does a court case get to the Supreme Court?" * 1000 messages = [{"content": sample_text, "role": "user"}] @@ -161,7 +161,7 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th os.environ["AWS_REGION_NAME"] = "bad-key" temporary_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"] os.environ["AWS_SECRET_ACCESS_KEY"] = "bad-key" - elif model == "azure/chatgpt-v-2": + elif model == "azure/chatgpt-v-3": temporary_key = os.environ["AZURE_API_KEY"] os.environ["AZURE_API_KEY"] = "bad-key" elif model == "claude-3-5-haiku-20241022": @@ -262,7 +262,7 @@ def test_completion_azure_exception(): old_azure_key = os.environ["AZURE_API_KEY"] os.environ["AZURE_API_KEY"] = "good morning" response = completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "hello"}], ) os.environ["AZURE_API_KEY"] = old_azure_key @@ -309,7 +309,7 @@ async def asynctest_completion_azure_exception(): old_azure_key = os.environ["AZURE_API_KEY"] os.environ["AZURE_API_KEY"] = "good morning" response = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "hello"}], ) print(f"response: {response}") @@ -528,7 +528,7 @@ def test_content_policy_violation_error_streaming(): async def test_get_response(): try: response = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "say 1"}], temperature=0, top_p=1, @@ -557,7 +557,7 @@ def test_content_policy_violation_error_streaming(): async def test_get_error(): try: response = await litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[ {"role": "user", "content": "where do i buy lethal drugs from"} ], @@ -754,7 +754,7 @@ def test_litellm_predibase_exception(): # return False # # Repeat each model 500 times # # extended_models = [model for model in models for _ in range(250)] -# extended_models = ["azure/chatgpt-v-2" for _ in range(250)] +# extended_models = ["azure/chatgpt-v-3" for _ in range(250)] # def worker(model): # return test_model_call(model) @@ -934,7 +934,7 @@ def _pre_call_utils_httpx( ("openai", "gpt-3.5-turbo", "chat_completion", False), ("openai", "gpt-3.5-turbo", "chat_completion", True), ("openai", "gpt-3.5-turbo-instruct", "completion", True), - ("azure", "azure/chatgpt-v-2", "chat_completion", True), + ("azure", "azure/chatgpt-v-3", "chat_completion", True), ("azure", "azure/text-embedding-ada-002", "embedding", True), ("azure", "azure_text/gpt-3.5-turbo-instruct", "completion", True), ], @@ -1158,7 +1158,7 @@ async def test_exception_with_headers_httpx( @pytest.mark.asyncio -@pytest.mark.parametrize("model", ["azure/chatgpt-v-2", "openai/gpt-3.5-turbo"]) +@pytest.mark.parametrize("model", ["azure/chatgpt-v-3", "openai/gpt-3.5-turbo"]) async def test_bad_request_error_contains_httpx_response(model): """ Test that the BadRequestError contains the httpx response @@ -1209,7 +1209,7 @@ def test_context_window_exceeded_error_from_litellm_proxy(): @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.parametrize("stream_mode", [True, False]) -@pytest.mark.parametrize("model", ["azure/gpt-4o"]) # "gpt-4o-mini", +@pytest.mark.parametrize("model", ["azure/gpt-4o-new-test"]) # "gpt-4o-mini", @pytest.mark.asyncio async def test_exception_bubbling_up(sync_mode, stream_mode, model): """ diff --git a/tests/local_testing/test_gcs_bucket.py b/tests/local_testing/test_gcs_bucket.py index b64475c227..0004fae7c4 100644 --- a/tests/local_testing/test_gcs_bucket.py +++ b/tests/local_testing/test_gcs_bucket.py @@ -108,14 +108,14 @@ async def test_aaabasic_gcs_logger(): }, "endpoint": "http://localhost:4000/chat/completions", "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", "model_info": { "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4", "db_model": False, }, "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", "caching_groups": None, - "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n", + "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n", }, ) @@ -216,14 +216,14 @@ async def test_basic_gcs_logger_failure(): }, "endpoint": "http://localhost:4000/chat/completions", "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", "model_info": { "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4", "db_model": False, }, "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", "caching_groups": None, - "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n", + "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n", }, ) except Exception: @@ -626,14 +626,14 @@ async def test_basic_gcs_logger_with_folder_in_bucket_name(): }, "endpoint": "http://localhost:4000/chat/completions", "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", "model_info": { "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4", "db_model": False, }, "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", "caching_groups": None, - "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n", + "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n", }, ) diff --git a/tests/local_testing/test_health_check.py b/tests/local_testing/test_health_check.py index 809cd1ccbd..bf326d884b 100644 --- a/tests/local_testing/test_health_check.py +++ b/tests/local_testing/test_health_check.py @@ -20,7 +20,7 @@ import litellm async def test_azure_health_check(): response = await litellm.ahealth_check( model_params={ - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [{"role": "user", "content": "Hey, how's it going?"}], "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_helicone_integration.py b/tests/local_testing/test_helicone_integration.py index 968a9aa5b1..3a6fa0309b 100644 --- a/tests/local_testing/test_helicone_integration.py +++ b/tests/local_testing/test_helicone_integration.py @@ -78,7 +78,7 @@ async def make_async_calls(metadata=None, **completion_kwargs): def create_async_task(**completion_kwargs): completion_args = { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_version": "2024-02-01", "messages": [{"role": "user", "content": "This is a test"}], "max_tokens": 5, diff --git a/tests/local_testing/test_least_busy_routing.py b/tests/local_testing/test_least_busy_routing.py index cf69f596d9..7e4393da0b 100644 --- a/tests/local_testing/test_least_busy_routing.py +++ b/tests/local_testing/test_least_busy_routing.py @@ -33,7 +33,7 @@ def test_model_added(): "litellm_params": { "metadata": { "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", }, "model_info": {"id": "1234"}, } @@ -47,7 +47,7 @@ def test_get_available_deployments(): test_cache = DualCache() least_busy_logger = LeastBusyLoggingHandler(router_cache=test_cache, model_list=[]) model_group = "gpt-3.5-turbo" - deployment = "azure/chatgpt-v-2" + deployment = "azure/chatgpt-v-3" kwargs = { "litellm_params": { "metadata": { @@ -113,7 +113,7 @@ async def test_router_get_available_deployments(async_test): router.leastbusy_logger.test_flag = True model_group = "azure-model" - deployment = "azure/chatgpt-v-2" + deployment = "azure/chatgpt-v-3" request_count_dict = {1: 10, 2: 54, 3: 100} cache_key = f"{model_group}_request_count" if async_test is True: diff --git a/tests/local_testing/test_load_test_router_s3.py b/tests/local_testing/test_load_test_router_s3.py index 3a022ae991..3a2567b686 100644 --- a/tests/local_testing/test_load_test_router_s3.py +++ b/tests/local_testing/test_load_test_router_s3.py @@ -46,7 +46,7 @@ # { # "model_name": "gpt-3.5-turbo", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_base": os.getenv("AZURE_API_BASE"), # "api_version": os.getenv("AZURE_API_VERSION"), diff --git a/tests/local_testing/test_loadtest_router.py b/tests/local_testing/test_loadtest_router.py index a12a45b514..0d8a09ca62 100644 --- a/tests/local_testing/test_loadtest_router.py +++ b/tests/local_testing/test_loadtest_router.py @@ -38,7 +38,7 @@ # { # "model_name": "gpt-3.5-turbo", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_base": os.getenv("AZURE_API_BASE"), # "api_version": os.getenv("AZURE_API_VERSION"), diff --git a/tests/local_testing/test_lowest_cost_routing.py b/tests/local_testing/test_lowest_cost_routing.py index 4e3105b5ff..caca007052 100644 --- a/tests/local_testing/test_lowest_cost_routing.py +++ b/tests/local_testing/test_lowest_cost_routing.py @@ -60,7 +60,7 @@ async def test_get_available_deployments_custom_price(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00003, }, diff --git a/tests/local_testing/test_lowest_latency_routing.py b/tests/local_testing/test_lowest_latency_routing.py index 4234490982..74dae25c1f 100644 --- a/tests/local_testing/test_lowest_latency_routing.py +++ b/tests/local_testing/test_lowest_latency_routing.py @@ -48,7 +48,7 @@ async def test_latency_memory_leak(sync_mode): "litellm_params": { "metadata": { "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", }, "model_info": {"id": deployment_id}, } @@ -130,7 +130,7 @@ def test_latency_updated(): "litellm_params": { "metadata": { "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", }, "model_info": {"id": deployment_id}, } @@ -173,7 +173,7 @@ def test_latency_updated_custom_ttl(): "litellm_params": { "metadata": { "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", }, "model_info": {"id": deployment_id}, } @@ -200,12 +200,12 @@ def test_get_available_deployments(): model_list = [ { "model_name": "gpt-3.5-turbo", - "litellm_params": {"model": "azure/chatgpt-v-2"}, + "litellm_params": {"model": "azure/chatgpt-v-3"}, "model_info": {"id": "1234"}, }, { "model_name": "gpt-3.5-turbo", - "litellm_params": {"model": "azure/chatgpt-v-2"}, + "litellm_params": {"model": "azure/chatgpt-v-3"}, "model_info": {"id": "5678"}, }, ] @@ -219,7 +219,7 @@ def test_get_available_deployments(): "litellm_params": { "metadata": { "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", }, "model_info": {"id": deployment_id}, } @@ -240,7 +240,7 @@ def test_get_available_deployments(): "litellm_params": { "metadata": { "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", }, "model_info": {"id": deployment_id}, } @@ -275,7 +275,7 @@ async def _deploy(lowest_latency_logger, deployment_id, tokens_used, duration): "litellm_params": { "metadata": { "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", }, "model_info": {"id": deployment_id}, } @@ -317,12 +317,12 @@ def test_get_available_endpoints_tpm_rpm_check_async(ans_rpm): model_list = [ { "model_name": "gpt-3.5-turbo", - "litellm_params": {"model": "azure/chatgpt-v-2"}, + "litellm_params": {"model": "azure/chatgpt-v-3"}, "model_info": {"id": "1234", "rpm": ans_rpm}, }, { "model_name": "gpt-3.5-turbo", - "litellm_params": {"model": "azure/chatgpt-v-2"}, + "litellm_params": {"model": "azure/chatgpt-v-3"}, "model_info": {"id": "5678", "rpm": non_ans_rpm}, }, ] @@ -366,12 +366,12 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm): model_list = [ { "model_name": "gpt-3.5-turbo", - "litellm_params": {"model": "azure/chatgpt-v-2"}, + "litellm_params": {"model": "azure/chatgpt-v-3"}, "model_info": {"id": "1234", "rpm": ans_rpm}, }, { "model_name": "gpt-3.5-turbo", - "litellm_params": {"model": "azure/chatgpt-v-2"}, + "litellm_params": {"model": "azure/chatgpt-v-3"}, "model_info": {"id": "5678", "rpm": non_ans_rpm}, }, ] @@ -385,7 +385,7 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm): "litellm_params": { "metadata": { "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", }, "model_info": {"id": deployment_id}, } @@ -407,7 +407,7 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm): "litellm_params": { "metadata": { "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", }, "model_info": {"id": deployment_id}, } diff --git a/tests/local_testing/test_mem_usage.py b/tests/local_testing/test_mem_usage.py index 4a804b4033..9f18fb1e2d 100644 --- a/tests/local_testing/test_mem_usage.py +++ b/tests/local_testing/test_mem_usage.py @@ -29,7 +29,7 @@ # { # "model_name": "gpt-3.5-turbo", # openai model name # "litellm_params": { # params for litellm completion/embedding call -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_version": os.getenv("AZURE_API_VERSION"), # "api_base": os.getenv("AZURE_API_BASE"), @@ -40,7 +40,7 @@ # { # "model_name": "bad-model", # openai model name # "litellm_params": { # params for litellm completion/embedding call -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": "bad-key", # "api_version": os.getenv("AZURE_API_VERSION"), # "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_mock_request.py b/tests/local_testing/test_mock_request.py index 6842767d9d..6a9c5239f4 100644 --- a/tests/local_testing/test_mock_request.py +++ b/tests/local_testing/test_mock_request.py @@ -157,7 +157,7 @@ def test_router_mock_request_with_mock_timeout_with_fallbacks(): { "model_name": "azure-gpt", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), }, diff --git a/tests/local_testing/test_prometheus_service.py b/tests/local_testing/test_prometheus_service.py index c640532a07..cfbd6a1a83 100644 --- a/tests/local_testing/test_prometheus_service.py +++ b/tests/local_testing/test_prometheus_service.py @@ -104,12 +104,12 @@ async def test_router_with_caching(): model_list = [ { "model_name": "azure/gpt-4", - "litellm_params": get_azure_params("chatgpt-v-2"), + "litellm_params": get_azure_params("chatgpt-v-3"), "tpm": 100, }, { "model_name": "azure/gpt-4", - "litellm_params": get_azure_params("chatgpt-v-2"), + "litellm_params": get_azure_params("chatgpt-v-3"), "tpm": 1000, }, ] diff --git a/tests/local_testing/test_prompt_injection_detection.py b/tests/local_testing/test_prompt_injection_detection.py index c493a37227..8443aadcc6 100644 --- a/tests/local_testing/test_prompt_injection_detection.py +++ b/tests/local_testing/test_prompt_injection_detection.py @@ -107,7 +107,7 @@ async def test_prompt_injection_llm_eval(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_provider_specific_config.py b/tests/local_testing/test_provider_specific_config.py index fc382bd3e9..8fc4c6ec21 100644 --- a/tests/local_testing/test_provider_specific_config.py +++ b/tests/local_testing/test_provider_specific_config.py @@ -729,7 +729,7 @@ def azure_openai_test_completion(): try: # OVERRIDE WITH DYNAMIC MAX TOKENS response_1 = litellm.completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[ { "content": "Hello, how are you? Be as verbose as possible", @@ -743,7 +743,7 @@ def azure_openai_test_completion(): # USE CONFIG TOKENS response_2 = litellm.completion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[ { "content": "Hello, how are you? Be as verbose as possible", diff --git a/tests/local_testing/test_router.py b/tests/local_testing/test_router.py index 13eaeb09ab..eb845559e2 100644 --- a/tests/local_testing/test_router.py +++ b/tests/local_testing/test_router.py @@ -266,7 +266,7 @@ def test_router_sensitive_keys(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "special-key", }, "model_info": {"id": 12345}, @@ -334,7 +334,7 @@ async def test_router_retries(sync_mode): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), @@ -417,7 +417,7 @@ def test_exception_raising(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -479,7 +479,7 @@ def test_reading_key_from_model_list(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": old_api_key, "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -535,7 +535,7 @@ def test_reading_key_from_model_list(): def test_call_one_endpoint(): # [PROD TEST CASE] # user passes one deployment they want to call on the router, we call the specified one - # this test makes a completion calls azure/chatgpt-v-2, it should work + # this test makes a completion calls azure/chatgpt-v-3, it should work try: print("Testing calling a specific deployment") old_api_key = os.environ["AZURE_API_KEY"] @@ -544,7 +544,7 @@ def test_call_one_endpoint(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": old_api_key, "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -574,7 +574,7 @@ def test_call_one_endpoint(): async def call_azure_completion(): response = await router.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "hello this request will pass"}], specific_deployment=True, ) @@ -620,7 +620,7 @@ def test_router_azure_acompletion(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": old_api_key, "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -793,7 +793,7 @@ def test_router_context_window_check_pre_call_check_in_group_custom_model_info() { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -847,7 +847,7 @@ def test_router_context_window_check_pre_call_check(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -901,7 +901,7 @@ def test_router_context_window_check_pre_call_check_out_group(): { "model_name": "gpt-3.5-turbo-small", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -980,7 +980,7 @@ def test_router_region_pre_call_check(allowed_model_region): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -2616,7 +2616,7 @@ def test_is_team_specific_model(): # { # "model_name": "gpt-3.5-turbo", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_base": os.getenv("AZURE_API_BASE"), # "tpm": 100000, @@ -2626,7 +2626,7 @@ def test_is_team_specific_model(): # { # "model_name": "gpt-3.5-turbo", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_base": os.getenv("AZURE_API_BASE"), # "tpm": 500, diff --git a/tests/local_testing/test_router_budget_limiter.py b/tests/local_testing/test_router_budget_limiter.py index 8d4948f8f9..9c20b6d098 100644 --- a/tests/local_testing/test_router_budget_limiter.py +++ b/tests/local_testing/test_router_budget_limiter.py @@ -74,7 +74,7 @@ async def test_provider_budgets_e2e_test(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -268,7 +268,7 @@ async def test_prometheus_metric_tracking(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_router_caching.py b/tests/local_testing/test_router_caching.py index 53a79b9434..574f133ace 100644 --- a/tests/local_testing/test_router_caching.py +++ b/tests/local_testing/test_router_caching.py @@ -96,7 +96,7 @@ async def test_acompletion_caching_on_router(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), @@ -213,7 +213,7 @@ async def test_acompletion_caching_with_ttl_on_router(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), @@ -279,7 +279,7 @@ async def test_acompletion_caching_on_router_caching_groups(): { "model_name": "azure-gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), diff --git a/tests/local_testing/test_router_client_init.py b/tests/local_testing/test_router_client_init.py index 1440dfecaa..42fc49a4c7 100644 --- a/tests/local_testing/test_router_client_init.py +++ b/tests/local_testing/test_router_client_init.py @@ -43,7 +43,7 @@ async def test_router_init(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), diff --git a/tests/local_testing/test_router_cooldowns.py b/tests/local_testing/test_router_cooldowns.py index 80ceb33c01..8428200109 100644 --- a/tests/local_testing/test_router_cooldowns.py +++ b/tests/local_testing/test_router_cooldowns.py @@ -41,7 +41,7 @@ async def test_cooldown_badrequest_error(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_router_debug_logs.py b/tests/local_testing/test_router_debug_logs.py index ba59a3c2fd..bce09404d8 100644 --- a/tests/local_testing/test_router_debug_logs.py +++ b/tests/local_testing/test_router_debug_logs.py @@ -33,7 +33,7 @@ def test_async_fallbacks(caplog): { "model_name": "azure/gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -93,7 +93,7 @@ def test_async_fallbacks(caplog): # - error request, falling back notice, success notice expected_logs = [ "Falling back to model_group = azure/gpt-3.5-turbo", - "litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m", + "litellm.acompletion(model=azure/chatgpt-v-3)\x1b[32m 200 OK\x1b[0m", "Successful fallback b/w models.", ] diff --git a/tests/local_testing/test_router_fallbacks.py b/tests/local_testing/test_router_fallbacks.py index 576ad0fcaa..ced2c4dd9e 100644 --- a/tests/local_testing/test_router_fallbacks.py +++ b/tests/local_testing/test_router_fallbacks.py @@ -67,7 +67,7 @@ def test_sync_fallbacks(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -78,7 +78,7 @@ def test_sync_fallbacks(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -150,7 +150,7 @@ async def test_async_fallbacks(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -161,7 +161,7 @@ async def test_async_fallbacks(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -349,7 +349,7 @@ def test_dynamic_fallbacks_sync(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -360,7 +360,7 @@ def test_dynamic_fallbacks_sync(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -426,7 +426,7 @@ async def test_dynamic_fallbacks_async(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -437,7 +437,7 @@ async def test_dynamic_fallbacks_async(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -509,7 +509,7 @@ async def test_async_fallbacks_streaming(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -520,7 +520,7 @@ async def test_async_fallbacks_streaming(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -594,7 +594,7 @@ def test_sync_fallbacks_streaming(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -605,7 +605,7 @@ def test_sync_fallbacks_streaming(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -675,7 +675,7 @@ async def test_async_fallbacks_max_retries_per_request(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -686,7 +686,7 @@ async def test_async_fallbacks_max_retries_per_request(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -808,13 +808,13 @@ def test_ausage_based_routing_fallbacks(): model_list = [ { "model_name": "azure/gpt-4-fast", - "litellm_params": get_azure_params("chatgpt-v-2"), + "litellm_params": get_azure_params("chatgpt-v-3"), "model_info": {"id": 1}, "rpm": AZURE_FAST_RPM, }, { "model_name": "azure/gpt-4-basic", - "litellm_params": get_azure_params("chatgpt-v-2"), + "litellm_params": get_azure_params("chatgpt-v-3"), "model_info": {"id": 2}, "rpm": AZURE_BASIC_RPM, }, @@ -889,7 +889,7 @@ def test_custom_cooldown_times(): { # list of model deployments "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -899,7 +899,7 @@ def test_custom_cooldown_times(): { # list of model deployments "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -993,7 +993,7 @@ async def test_service_unavailable_fallbacks(sync_mode): { "model_name": "gpt-3.5-turbo-0125-preview", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_router_get_deployments.py b/tests/local_testing/test_router_get_deployments.py index efbb5d16e7..ff88824d4a 100644 --- a/tests/local_testing/test_router_get_deployments.py +++ b/tests/local_testing/test_router_get_deployments.py @@ -41,7 +41,7 @@ def test_weighted_selection_router(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), @@ -54,7 +54,7 @@ def test_weighted_selection_router(): ) selection_counts = defaultdict(int) - # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time + # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time for _ in range(1000): selected_model = router.get_available_deployment("gpt-3.5-turbo") selected_model_id = selected_model["litellm_params"]["model"] @@ -64,10 +64,10 @@ def test_weighted_selection_router(): total_requests = sum(selection_counts.values()) - # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests + # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests assert ( - selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89 - ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" + selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89 + ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" router.reset() except Exception as e: @@ -97,7 +97,7 @@ def test_weighted_selection_router_tpm(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), @@ -110,7 +110,7 @@ def test_weighted_selection_router_tpm(): ) selection_counts = defaultdict(int) - # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time + # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time for _ in range(1000): selected_model = router.get_available_deployment("gpt-3.5-turbo") selected_model_id = selected_model["litellm_params"]["model"] @@ -120,10 +120,10 @@ def test_weighted_selection_router_tpm(): total_requests = sum(selection_counts.values()) - # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests + # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests assert ( - selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89 - ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" + selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89 + ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" router.reset() except Exception as e: @@ -153,7 +153,7 @@ def test_weighted_selection_router_tpm_as_router_param(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), @@ -166,7 +166,7 @@ def test_weighted_selection_router_tpm_as_router_param(): ) selection_counts = defaultdict(int) - # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time + # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time for _ in range(1000): selected_model = router.get_available_deployment("gpt-3.5-turbo") selected_model_id = selected_model["litellm_params"]["model"] @@ -176,10 +176,10 @@ def test_weighted_selection_router_tpm_as_router_param(): total_requests = sum(selection_counts.values()) - # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests + # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests assert ( - selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89 - ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" + selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89 + ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" router.reset() except Exception as e: @@ -210,7 +210,7 @@ def test_weighted_selection_router_rpm_as_router_param(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), @@ -224,7 +224,7 @@ def test_weighted_selection_router_rpm_as_router_param(): ) selection_counts = defaultdict(int) - # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time + # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time for _ in range(1000): selected_model = router.get_available_deployment("gpt-3.5-turbo") selected_model_id = selected_model["litellm_params"]["model"] @@ -234,10 +234,10 @@ def test_weighted_selection_router_rpm_as_router_param(): total_requests = sum(selection_counts.values()) - # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests + # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests assert ( - selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89 - ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" + selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89 + ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" router.reset() except Exception as e: @@ -266,7 +266,7 @@ def test_weighted_selection_router_no_rpm_set(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), @@ -286,7 +286,7 @@ def test_weighted_selection_router_no_rpm_set(): ) selection_counts = defaultdict(int) - # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time + # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time for _ in range(1000): selected_model = router.get_available_deployment("claude-1") selected_model_id = selected_model["litellm_params"]["model"] @@ -296,7 +296,7 @@ def test_weighted_selection_router_no_rpm_set(): total_requests = sum(selection_counts.values()) - # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests + # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests assert ( selection_counts["bedrock/claude1.2"] / total_requests == 1 ), f"Assertion failed: Selection counts {selection_counts}" @@ -325,7 +325,7 @@ def test_model_group_aliases(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), @@ -358,7 +358,7 @@ def test_model_group_aliases(): ) # test that - # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time + # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time selection_counts = defaultdict(int) for _ in range(1000): selected_model = router.get_available_deployment("gpt-3.5-turbo") @@ -369,10 +369,10 @@ def test_model_group_aliases(): total_requests = sum(selection_counts.values()) - # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests + # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests assert ( - selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89 - ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" + selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89 + ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" router.reset() except Exception as e: @@ -552,7 +552,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "api_version": os.getenv("AZURE_API_VERSION"), @@ -566,7 +566,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list): ) selection_counts = defaultdict(int) - # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time + # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time for _ in range(1000): selected_model = await router.async_get_available_deployment( "gpt-3.5-turbo", request_kwargs={} @@ -579,13 +579,13 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list): total_requests = sum(selection_counts.values()) if rpm_list[0] is not None or tpm_list[0] is not None: - # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests + # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests assert ( - selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89 - ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" + selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89 + ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}" else: # Assert both are used - assert selection_counts["azure/chatgpt-v-2"] > 0 + assert selection_counts["azure/chatgpt-v-3"] > 0 assert selection_counts["gpt-3.5-turbo"] > 0 router.reset() except Exception as e: diff --git a/tests/local_testing/test_router_init.py b/tests/local_testing/test_router_init.py index 00b2daa764..dd2d43dc26 100644 --- a/tests/local_testing/test_router_init.py +++ b/tests/local_testing/test_router_init.py @@ -40,7 +40,7 @@ # { # "model_name": "gpt-3.5-turbo", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_version": os.getenv("AZURE_API_VERSION"), # "api_base": os.getenv("AZURE_API_BASE"), @@ -96,7 +96,7 @@ # { # "model_name": "gpt-3.5-turbo", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_version": os.getenv("AZURE_API_VERSION"), # "api_base": os.getenv("AZURE_API_BASE"), @@ -134,7 +134,7 @@ # { # "model_name": "azure-cloudflare", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_version": os.getenv("AZURE_API_VERSION"), # "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1", @@ -201,7 +201,7 @@ # { # "model_name": "gpt-3.5-turbo", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_version": os.getenv("AZURE_API_VERSION"), # "api_base": os.getenv("AZURE_API_BASE"), @@ -254,7 +254,7 @@ # { # "model_name": "gpt-3.5-turbo", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_version": os.getenv("AZURE_API_VERSION"), # "api_base": os.getenv("AZURE_API_BASE"), @@ -615,7 +615,7 @@ # { # "model_name": "gpt-3.5-turbo", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_version": os.getenv("AZURE_API_VERSION"), # "api_base": os.getenv("AZURE_API_BASE"), @@ -660,7 +660,7 @@ # { # "model_name": "gpt-3.5-turbo", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_version": os.getenv("AZURE_API_VERSION"), # "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_router_policy_violation.py b/tests/local_testing/test_router_policy_violation.py index 52f50eb591..1e72868db6 100644 --- a/tests/local_testing/test_router_policy_violation.py +++ b/tests/local_testing/test_router_policy_violation.py @@ -69,7 +69,7 @@ async def test_async_fallbacks(): { # list of model deployments "model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_router_retries.py b/tests/local_testing/test_router_retries.py index 12bd71cfd1..d028010afa 100644 --- a/tests/local_testing/test_router_retries.py +++ b/tests/local_testing/test_router_retries.py @@ -166,7 +166,7 @@ async def test_router_retry_policy(error_type): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -175,7 +175,7 @@ async def test_router_retry_policy(error_type): { "model_name": "bad-model", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -275,7 +275,7 @@ async def test_dynamic_router_retry_policy(model_group): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -287,7 +287,7 @@ async def test_dynamic_router_retry_policy(model_group): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -299,7 +299,7 @@ async def test_dynamic_router_retry_policy(model_group): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -311,7 +311,7 @@ async def test_dynamic_router_retry_policy(model_group): { "model_name": "bad-model", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -393,7 +393,7 @@ def test_retry_rate_limit_error_with_healthy_deployments(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -426,7 +426,7 @@ def test_do_retry_rate_limit_error_with_no_fallbacks_and_no_healthy_deployments( { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -459,14 +459,14 @@ def test_raise_context_window_exceeded_error(): llm_provider="azure", model="gpt-3.5-turbo", ) - context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-2"]}] + context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-3"]}] router = Router( model_list=[ { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -508,7 +508,7 @@ def test_raise_context_window_exceeded_error_no_retry(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -562,7 +562,7 @@ def test_timeout_for_rate_limit_error_with_healthy_deployments( { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -589,7 +589,7 @@ def test_timeout_for_rate_limit_error_with_healthy_deployments( "litellm_params": { "api_key": "my-key", "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com", - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", }, "model_info": { "id": "0e30bc8a63fa91ae4415d4234e231b3f9e6dd900cac57d118ce13a720d95e9d6", @@ -615,7 +615,7 @@ def test_timeout_for_rate_limit_error_with_no_healthy_deployments(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -650,7 +650,7 @@ def test_no_retry_for_not_found_error_404(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -709,7 +709,7 @@ def test_no_retry_when_no_healthy_deployments(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_router_timeout.py b/tests/local_testing/test_router_timeout.py index 3f149a4342..c8d7502eee 100644 --- a/tests/local_testing/test_router_timeout.py +++ b/tests/local_testing/test_router_timeout.py @@ -30,7 +30,7 @@ def test_router_timeouts(): { "model_name": "openai-gpt-4", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "os.environ/AZURE_API_KEY", "api_base": "os.environ/AZURE_API_BASE", "api_version": "os.environ/AZURE_API_VERSION", diff --git a/tests/local_testing/test_router_utils.py b/tests/local_testing/test_router_utils.py index 067aaf032a..cd26f8ad60 100644 --- a/tests/local_testing/test_router_utils.py +++ b/tests/local_testing/test_router_utils.py @@ -32,7 +32,7 @@ def test_returned_settings(): { "model_name": "gpt-3.5-turbo", # openai model name "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -96,7 +96,7 @@ def test_update_kwargs_before_fallbacks_unit_test(): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), @@ -133,7 +133,7 @@ async def test_update_kwargs_before_fallbacks(call_type): { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "bad-key", "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), diff --git a/tests/local_testing/test_streaming.py b/tests/local_testing/test_streaming.py index 1f0730ef29..78226d0eab 100644 --- a/tests/local_testing/test_streaming.py +++ b/tests/local_testing/test_streaming.py @@ -241,7 +241,7 @@ tools_schema = [ def test_completion_azure_stream_special_char(): litellm.set_verbose = True messages = [{"role": "user", "content": "hi. respond with the tag only"}] - response = completion(model="azure/chatgpt-v-2", messages=messages, stream=True) + response = completion(model="azure/chatgpt-v-3", messages=messages, stream=True) response_str = "" for part in response: response_str += part.choices[0].delta.content or "" @@ -449,7 +449,7 @@ def test_completion_azure_stream(): }, ] response = completion( - model="azure/chatgpt-v-2", messages=messages, stream=True, max_tokens=50 + model="azure/chatgpt-v-3", messages=messages, stream=True, max_tokens=50 ) complete_response = "" # Add any assertions here to check the response @@ -2070,7 +2070,7 @@ def test_openai_chat_completion_complete_response_call(): "model", [ "gpt-3.5-turbo", - "azure/chatgpt-v-2", + "azure/chatgpt-v-3", "claude-3-haiku-20240307", "o1-preview", "o1", diff --git a/tests/local_testing/test_timeout.py b/tests/local_testing/test_timeout.py index b74cf89eaa..9342e789b4 100644 --- a/tests/local_testing/test_timeout.py +++ b/tests/local_testing/test_timeout.py @@ -23,7 +23,7 @@ import litellm [ ("gpt-3.5-turbo", "openai"), ("anthropic.claude-instant-v1", "bedrock"), - ("azure/chatgpt-v-2", "azure"), + ("azure/chatgpt-v-3", "azure"), ], ) @pytest.mark.parametrize("sync_mode", [True, False]) @@ -104,7 +104,7 @@ def test_hanging_request_azure(): { "model_name": "azure-gpt", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_base": os.environ["AZURE_API_BASE"], "api_key": os.environ["AZURE_API_KEY"], }, @@ -158,7 +158,7 @@ def test_hanging_request_openai(): { "model_name": "azure-gpt", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_base": os.environ["AZURE_API_BASE"], "api_key": os.environ["AZURE_API_KEY"], }, diff --git a/tests/local_testing/test_tpm_rpm_routing_v2.py b/tests/local_testing/test_tpm_rpm_routing_v2.py index d2b951a187..57443bbe4c 100644 --- a/tests/local_testing/test_tpm_rpm_routing_v2.py +++ b/tests/local_testing/test_tpm_rpm_routing_v2.py @@ -45,7 +45,7 @@ def test_tpm_rpm_updated(): ) model_group = "gpt-3.5-turbo" deployment_id = "1234" - deployment = "azure/chatgpt-v-2" + deployment = "azure/chatgpt-v-3" total_tokens = 50 standard_logging_payload: StandardLoggingPayload = create_standard_logging_payload() standard_logging_payload["model_group"] = model_group @@ -100,12 +100,12 @@ def test_get_available_deployments(): model_list = [ { "model_name": "gpt-3.5-turbo", - "litellm_params": {"model": "azure/chatgpt-v-2"}, + "litellm_params": {"model": "azure/chatgpt-v-3"}, "model_info": {"id": "1234"}, }, { "model_name": "gpt-3.5-turbo", - "litellm_params": {"model": "azure/chatgpt-v-2"}, + "litellm_params": {"model": "azure/chatgpt-v-3"}, "model_info": {"id": "5678"}, }, ] @@ -116,7 +116,7 @@ def test_get_available_deployments(): ## DEPLOYMENT 1 ## total_tokens = 50 deployment_id = "1234" - deployment = "azure/chatgpt-v-2" + deployment = "azure/chatgpt-v-3" standard_logging_payload = create_standard_logging_payload() standard_logging_payload["model_group"] = model_group standard_logging_payload["model_id"] = deployment_id @@ -721,7 +721,7 @@ async def test_tpm_rpm_routing_model_name_checks(): deployment = { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": os.getenv("AZURE_API_BASE"), "mock_response": "Hey, how's it going?", @@ -763,5 +763,5 @@ async def test_tpm_rpm_routing_model_name_checks(): assert ( standard_logging_payload["hidden_params"]["litellm_model_name"] - == "azure/chatgpt-v-2" + == "azure/chatgpt-v-3" ) diff --git a/tests/logging_callback_tests/test_alerting.py b/tests/logging_callback_tests/test_alerting.py index fc2eae00f7..26a5e0822f 100644 --- a/tests/logging_callback_tests/test_alerting.py +++ b/tests/logging_callback_tests/test_alerting.py @@ -56,7 +56,7 @@ def test_get_api_base_unit_test(model, optional_params, expected_api_base): async def test_get_api_base(): _pl = ProxyLogging(user_api_key_cache=DualCache()) _pl.update_values(alerting=["slack"], alerting_threshold=100, redis_cache=None) - model = "chatgpt-v-2" + model = "chatgpt-v-3" messages = [{"role": "user", "content": "Hey how's it going?"}] litellm_params = { "acompletion": True, diff --git a/tests/logging_callback_tests/test_amazing_s3_logs.py b/tests/logging_callback_tests/test_amazing_s3_logs.py index 17efb177d0..915041e714 100644 --- a/tests/logging_callback_tests/test_amazing_s3_logs.py +++ b/tests/logging_callback_tests/test_amazing_s3_logs.py @@ -244,7 +244,7 @@ async def make_async_calls(): for _ in range(5): task = asyncio.create_task( litellm.acompletion( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[{"role": "user", "content": "This is a test"}], max_tokens=5, temperature=0.7, diff --git a/tests/logging_callback_tests/test_spend_logs.py b/tests/logging_callback_tests/test_spend_logs.py index 972e636b48..d592931f25 100644 --- a/tests/logging_callback_tests/test_spend_logs.py +++ b/tests/logging_callback_tests/test_spend_logs.py @@ -40,7 +40,7 @@ def test_spend_logs_payload(model_id: Optional[str]): input_args: dict = { "kwargs": { - "model": "chatgpt-v-2", + "model": "chatgpt-v-3", "messages": [ {"role": "system", "content": "you are a helpful assistant.\n"}, {"role": "user", "content": "bom dia"}, @@ -89,7 +89,7 @@ def test_spend_logs_payload(model_id: Optional[str]): }, "endpoint": "http://localhost:4000/chat/completions", "model_group": "gpt-3.5-turbo", - "deployment": "azure/chatgpt-v-2", + "deployment": "azure/chatgpt-v-3", "model_info": { "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4", "db_model": False, @@ -99,7 +99,7 @@ def test_spend_logs_payload(model_id: Optional[str]): "error_information": None, "status": "success", "proxy_server_request": "{}", - "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n", + "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n", }, "model_info": { "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4", @@ -158,7 +158,7 @@ def test_spend_logs_payload(model_id: Optional[str]): "api_base": "openai-gpt-4-test-v-1.openai.azure.com", "acompletion": True, "complete_input_dict": { - "model": "chatgpt-v-2", + "model": "chatgpt-v-3", "messages": [ {"role": "system", "content": "you are a helpful assistant.\n"}, {"role": "user", "content": "bom dia"}, diff --git a/tests/old_proxy_tests/tests/load_test_q.py b/tests/old_proxy_tests/tests/load_test_q.py index 17fa185215..a0e22eda5a 100644 --- a/tests/old_proxy_tests/tests/load_test_q.py +++ b/tests/old_proxy_tests/tests/load_test_q.py @@ -25,7 +25,7 @@ config = { { "model_name": "gpt-3.5-turbo", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.environ["AZURE_API_KEY"], "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", "api_version": "2023-07-01-preview", diff --git a/tests/old_proxy_tests/tests/test_langchain_request.py b/tests/old_proxy_tests/tests/test_langchain_request.py index e94a077cc8..901edd783a 100644 --- a/tests/old_proxy_tests/tests/test_langchain_request.py +++ b/tests/old_proxy_tests/tests/test_langchain_request.py @@ -9,7 +9,7 @@ # chat = ChatOpenAI( # openai_api_base="http://0.0.0.0:8000", -# model = "azure/chatgpt-v-2", +# model = "azure/chatgpt-v-3", # temperature=0.1, # extra_body={ # "metadata": { diff --git a/tests/old_proxy_tests/tests/test_openai_exception_request.py b/tests/old_proxy_tests/tests/test_openai_exception_request.py index 46090e1c89..68b8997766 100644 --- a/tests/old_proxy_tests/tests/test_openai_exception_request.py +++ b/tests/old_proxy_tests/tests/test_openai_exception_request.py @@ -39,7 +39,7 @@ client = openai.AzureOpenAI( ) try: response = client.chat.completions.create( - model="chatgpt-v-2", + model="chatgpt-v-3", messages=[ { "role": "user", diff --git a/tests/old_proxy_tests/tests/test_openai_request.py b/tests/old_proxy_tests/tests/test_openai_request.py index bb7bf22687..41b8c43f2d 100644 --- a/tests/old_proxy_tests/tests/test_openai_request.py +++ b/tests/old_proxy_tests/tests/test_openai_request.py @@ -4,7 +4,7 @@ client = openai.OpenAI(api_key="hi", base_url="http://0.0.0.0:8000") # # request sent to model set on litellm proxy, `litellm --model` response = client.chat.completions.create( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[ {"role": "user", "content": "this is a test request, write a short poem"} ], diff --git a/tests/proxy_unit_tests/example_config_yaml/azure_config.yaml b/tests/proxy_unit_tests/example_config_yaml/azure_config.yaml index fd5865cd7c..111813c884 100644 --- a/tests/proxy_unit_tests/example_config_yaml/azure_config.yaml +++ b/tests/proxy_unit_tests/example_config_yaml/azure_config.yaml @@ -1,7 +1,7 @@ model_list: - model_name: gpt-4-team1 litellm_params: - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ api_version: "2023-05-15" api_key: os.environ/AZURE_API_KEY diff --git a/tests/proxy_unit_tests/test_configs/test_bad_config.yaml b/tests/proxy_unit_tests/test_configs/test_bad_config.yaml index 7c802a8408..0a16ecb3c5 100644 --- a/tests/proxy_unit_tests/test_configs/test_bad_config.yaml +++ b/tests/proxy_unit_tests/test_configs/test_bad_config.yaml @@ -5,12 +5,12 @@ model_list: model: gpt-3.5-turbo - model_name: working-azure-gpt-3.5-turbo litellm_params: - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 api_base: os.environ/AZURE_API_BASE api_key: os.environ/AZURE_API_KEY - model_name: azure-gpt-3.5-turbo litellm_params: - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 api_base: os.environ/AZURE_API_BASE api_key: bad-key - model_name: azure-embedding diff --git a/tests/proxy_unit_tests/test_configs/test_cloudflare_azure_with_cache_config.yaml b/tests/proxy_unit_tests/test_configs/test_cloudflare_azure_with_cache_config.yaml index c3c3cb1c32..aeadbeb872 100644 --- a/tests/proxy_unit_tests/test_configs/test_cloudflare_azure_with_cache_config.yaml +++ b/tests/proxy_unit_tests/test_configs/test_cloudflare_azure_with_cache_config.yaml @@ -1,7 +1,7 @@ model_list: - model_name: azure-cloudflare litellm_params: - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1 api_key: os.environ/AZURE_API_KEY api_version: 2023-07-01-preview diff --git a/tests/proxy_unit_tests/test_configs/test_config_no_auth.yaml b/tests/proxy_unit_tests/test_configs/test_config_no_auth.yaml index 1c5ddf2266..075bf7a09d 100644 --- a/tests/proxy_unit_tests/test_configs/test_config_no_auth.yaml +++ b/tests/proxy_unit_tests/test_configs/test_config_no_auth.yaml @@ -12,7 +12,7 @@ model_list: - litellm_params: api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1 api_key: os.environ/AZURE_API_KEY - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 model_name: azure-cloudflare-model - litellm_params: api_base: https://openai-france-1234.openai.azure.com diff --git a/tests/proxy_unit_tests/test_configs/test_custom_logger.yaml b/tests/proxy_unit_tests/test_configs/test_custom_logger.yaml index 145c618edd..2ad500b36f 100644 --- a/tests/proxy_unit_tests/test_configs/test_custom_logger.yaml +++ b/tests/proxy_unit_tests/test_configs/test_custom_logger.yaml @@ -1,7 +1,7 @@ model_list: - model_name: Azure OpenAI GPT-4 Canada litellm_params: - model: azure/chatgpt-v-2 + model: azure/chatgpt-v-3 api_base: os.environ/AZURE_API_BASE api_key: os.environ/AZURE_API_KEY api_version: "2023-07-01-preview" diff --git a/tests/proxy_unit_tests/test_key_generate_prisma.py b/tests/proxy_unit_tests/test_key_generate_prisma.py index d904de13b4..98cccccc79 100644 --- a/tests/proxy_unit_tests/test_key_generate_prisma.py +++ b/tests/proxy_unit_tests/test_key_generate_prisma.py @@ -1546,7 +1546,7 @@ def test_call_with_key_over_budget(prisma_client): ) await proxy_db_logger._PROXY_track_cost_callback( kwargs={ - "model": "chatgpt-v-2", + "model": "chatgpt-v-3", "stream": False, "litellm_params": { "metadata": { @@ -1578,7 +1578,7 @@ def test_call_with_key_over_budget(prisma_client): assert spend_log.request_id == request_id assert spend_log.spend == float("2e-05") - assert spend_log.model == "chatgpt-v-2" + assert spend_log.model == "chatgpt-v-3" assert ( spend_log.cache_key == "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e" @@ -1669,7 +1669,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client): proxy_db_logger = _ProxyDBLogger() await proxy_db_logger._PROXY_track_cost_callback( kwargs={ - "model": "chatgpt-v-2", + "model": "chatgpt-v-3", "stream": False, "litellm_params": { "metadata": { @@ -1702,7 +1702,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client): assert spend_log.request_id == request_id assert spend_log.spend == float("2e-05") - assert spend_log.model == "chatgpt-v-2" + assert spend_log.model == "chatgpt-v-3" assert ( spend_log.cache_key == "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e" @@ -1757,7 +1757,7 @@ async def test_call_with_key_over_model_budget( try: - # set budget for chatgpt-v-2 to 0.000001, expect the next request to fail + # set budget for chatgpt-v-3 to 0.000001, expect the next request to fail model_max_budget = { "gpt-4o-mini": { "budget_limit": "0.000001", @@ -1898,7 +1898,7 @@ async def test_call_with_key_never_over_budget(prisma_client): ) await proxy_db_logger._PROXY_track_cost_callback( kwargs={ - "model": "chatgpt-v-2", + "model": "chatgpt-v-3", "stream": False, "litellm_params": { "metadata": { @@ -1987,7 +1987,7 @@ async def test_call_with_key_over_budget_stream(prisma_client): await proxy_db_logger._PROXY_track_cost_callback( kwargs={ "call_type": "acompletion", - "model": "sagemaker-chatgpt-v-2", + "model": "sagemaker-chatgpt-v-3", "stream": True, "complete_streaming_response": resp, "litellm_params": { @@ -2431,7 +2431,7 @@ async def track_cost_callback_helper_fn(generated_key: str, user_id: str): await proxy_db_logger._PROXY_track_cost_callback( kwargs={ "call_type": "acompletion", - "model": "sagemaker-chatgpt-v-2", + "model": "sagemaker-chatgpt-v-3", "stream": True, "complete_streaming_response": resp, "litellm_params": { diff --git a/tests/proxy_unit_tests/test_proxy_custom_logger.py b/tests/proxy_unit_tests/test_proxy_custom_logger.py index ad60335152..bdad7c9d7d 100644 --- a/tests/proxy_unit_tests/test_proxy_custom_logger.py +++ b/tests/proxy_unit_tests/test_proxy_custom_logger.py @@ -164,7 +164,7 @@ def test_chat_completion(client): my_custom_logger.async_success == True ) # checks if the status of async_success is True, only the async_log_success_event can set this to true assert ( - my_custom_logger.async_completion_kwargs["model"] == "chatgpt-v-2" + my_custom_logger.async_completion_kwargs["model"] == "chatgpt-v-3" ) # checks if kwargs passed to async_log_success_event are correct print( "\n\n Custom Logger Async Completion args", diff --git a/tests/proxy_unit_tests/test_proxy_pass_user_config.py b/tests/proxy_unit_tests/test_proxy_pass_user_config.py index 12def1160f..3ecc252264 100644 --- a/tests/proxy_unit_tests/test_proxy_pass_user_config.py +++ b/tests/proxy_unit_tests/test_proxy_pass_user_config.py @@ -64,7 +64,7 @@ def test_chat_completion(client_no_auth): ModelConfig( model_name="user-azure-instance", litellm_params=CompletionRequest( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", api_key=os.getenv("AZURE_API_KEY"), api_version=os.getenv("AZURE_API_VERSION"), api_base=os.getenv("AZURE_API_BASE"), diff --git a/tests/proxy_unit_tests/test_proxy_server.py b/tests/proxy_unit_tests/test_proxy_server.py index 68f4ff8ec4..dda39d2bd5 100644 --- a/tests/proxy_unit_tests/test_proxy_server.py +++ b/tests/proxy_unit_tests/test_proxy_server.py @@ -446,7 +446,7 @@ def test_chat_completion_azure(mock_acompletion, client_no_auth): try: # Your test data test_data = { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [ {"role": "user", "content": "write 1 sentence poem"}, ], @@ -457,7 +457,7 @@ def test_chat_completion_azure(mock_acompletion, client_no_auth): response = client_no_auth.post("/v1/chat/completions", json=test_data) mock_acompletion.assert_called_once_with( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[ {"role": "user", "content": "write 1 sentence poem"}, ], @@ -489,19 +489,19 @@ def test_openai_deployments_model_chat_completions_azure( try: # Your test data test_data = { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [ {"role": "user", "content": "write 1 sentence poem"}, ], "max_tokens": 10, } - url = "/openai/deployments/azure/chatgpt-v-2/chat/completions" + url = "/openai/deployments/azure/chatgpt-v-3/chat/completions" print(f"testing proxy server with Azure Request {url}") response = client_no_auth.post(url, json=test_data) mock_acompletion.assert_called_once_with( - model="azure/chatgpt-v-2", + model="azure/chatgpt-v-3", messages=[ {"role": "user", "content": "write 1 sentence poem"}, ], @@ -1314,7 +1314,7 @@ async def test_add_callback_via_key(prisma_client): try: # Your test data test_data = { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [ {"role": "user", "content": "write 1 sentence poem"}, ], @@ -1408,7 +1408,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils( request._url = URL(url="/chat/completions") test_data = { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [ {"role": "user", "content": "write 1 sentence poem"}, ], @@ -1423,7 +1423,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils( data = { "data": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [{"role": "user", "content": "write 1 sentence poem"}], "max_tokens": 10, "mock_response": "Hello world", @@ -1523,7 +1523,7 @@ async def test_disable_fallbacks_by_key(disable_fallbacks_set): key_metadata = {"disable_fallbacks": disable_fallbacks_set} existing_data = { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [{"role": "user", "content": "write 1 sentence poem"}], } data = LiteLLMProxyRequestSetup.add_key_level_controls( @@ -1564,7 +1564,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket( request._url = URL(url="/chat/completions") test_data = { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [ {"role": "user", "content": "write 1 sentence poem"}, ], @@ -1579,7 +1579,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket( data = { "data": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [{"role": "user", "content": "write 1 sentence poem"}], "max_tokens": 10, "mock_response": "Hello world", @@ -1697,7 +1697,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith( request._url = URL(url="/chat/completions") test_data = { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [ {"role": "user", "content": "write 1 sentence poem"}, ], @@ -1712,7 +1712,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith( data = { "data": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "messages": [{"role": "user", "content": "write 1 sentence poem"}], "max_tokens": 10, "mock_response": "Hello world", diff --git a/tests/proxy_unit_tests/test_proxy_server_keys.py b/tests/proxy_unit_tests/test_proxy_server_keys.py index 6eb41202cd..8b8e943ba7 100644 --- a/tests/proxy_unit_tests/test_proxy_server_keys.py +++ b/tests/proxy_unit_tests/test_proxy_server_keys.py @@ -171,7 +171,7 @@ # model_data = { # "model_name": "azure-model", # "litellm_params": { -# "model": "azure/chatgpt-v-2", +# "model": "azure/chatgpt-v-3", # "api_key": os.getenv("AZURE_API_KEY"), # "api_base": os.getenv("AZURE_API_BASE"), # "api_version": os.getenv("AZURE_API_VERSION") diff --git a/tests/test_models.py b/tests/test_models.py index 31e564a829..89944c07b3 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -67,7 +67,7 @@ async def add_models(session, model_id="123", model_name="azure-gpt-3.5", key="s data = { "model_name": model_name, "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "os.environ/AZURE_API_KEY", "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", "api_version": "2023-05-15", @@ -100,7 +100,7 @@ async def update_model(session, model_id="123", model_name="azure-gpt-3.5", key= data = { "model_name": model_name, "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": "os.environ/AZURE_API_KEY", "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", "api_version": "2023-05-15", @@ -292,7 +292,7 @@ async def add_model_for_health_checking(session, model_id="123"): data = { "model_name": f"azure-model-health-check-{model_id}", "litellm_params": { - "model": "azure/chatgpt-v-2", + "model": "azure/chatgpt-v-3", "api_key": os.getenv("AZURE_API_KEY"), "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", "api_version": "2023-05-15", @@ -417,7 +417,7 @@ async def test_add_model_run_health(): assert _health_info["healthy_count"] == 1 assert ( - _healthy_endpooint["model"] == "azure/chatgpt-v-2" + _healthy_endpooint["model"] == "azure/chatgpt-v-3" ) # this is the model that got added # assert httpx client is is unchanges From 3e87ec4f16e8b2d722b3e0cae5ef77812dd63fe7 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 18 Apr 2025 14:23:16 -0700 Subject: [PATCH 5/5] test: replace removed fireworks ai models --- tests/local_testing/test_completion_cost.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index 3e30041489..3f2be26036 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -1284,7 +1284,7 @@ from litellm.llms.fireworks_ai.cost_calculator import get_base_model_for_pricing "model, base_model", [ ("fireworks_ai/llama-v3p1-405b-instruct", "fireworks-ai-default"), - ("fireworks_ai/mixtral-8x7b-instruct", "fireworks-ai-moe-up-to-56b"), + ("fireworks_ai/llama4-maverick-instruct-basic", "fireworks-ai-moe-up-to-56b"), ], ) def test_get_model_params_fireworks_ai(model, base_model): @@ -1294,7 +1294,7 @@ def test_get_model_params_fireworks_ai(model, base_model): @pytest.mark.parametrize( "model", - ["fireworks_ai/llama-v3p1-405b-instruct", "fireworks_ai/mixtral-8x7b-instruct"], + ["fireworks_ai/llama-v3p1-405b-instruct", "fireworks_ai/llama4-maverick-instruct-basic"], ) def test_completion_cost_fireworks_ai(model): os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"