diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 55052761c7..760a7c7842 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -7058,6 +7058,17 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_tool_choice": true }, + "command-a-03-2025": { + "max_tokens": 8000, + "max_input_tokens": 256000, + "max_output_tokens": 8000, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "litellm_provider": "cohere_chat", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true + }, "command-r": { "max_tokens": 4096, "max_input_tokens": 128000, diff --git a/tests/openai_endpoints_tests/test_e2e_openai_responses_api.py b/tests/openai_endpoints_tests/test_e2e_openai_responses_api.py index f87444141e..1da0e740a9 100644 --- a/tests/openai_endpoints_tests/test_e2e_openai_responses_api.py +++ b/tests/openai_endpoints_tests/test_e2e_openai_responses_api.py @@ -73,7 +73,7 @@ def validate_stream_chunk(chunk): def test_basic_response(): client = get_test_client() response = client.responses.create( - model="gpt-4.0", input="just respond with the word 'ping'" + model="gpt-4o", input="just respond with the word 'ping'" ) print("basic response=", response) @@ -97,7 +97,7 @@ def test_basic_response(): def test_streaming_response(): client = get_test_client() stream = client.responses.create( - model="gpt-4.0", input="just respond with the word 'ping'", stream=True + model="gpt-4o", input="just respond with the word 'ping'", stream=True ) collected_chunks = [] @@ -120,5 +120,5 @@ def test_bad_request_bad_param_error(): with pytest.raises(BadRequestError): # Trigger error with invalid model name client.responses.create( - model="gpt-4.0", input="This should fail", temperature=2000 + model="gpt-4o", input="This should fail", temperature=2000 )