feat: update Cerebras inference provider to support dynamic model listing

- update Cerebras to use OpenAIMixin - enable openai completions tests - enable openai chat completions tests - disable with n > 1 tests - add recording for --setup cerebras --subdirs inference --pattern openai test with: `./scripts/integration-tests.sh --stack-config server:ci-tests --setup cerebras --subdirs inference --pattern openai`
2025-10-04 12:07:34 +00:00 · 2025-09-18 06:34:31 -04:00 · 2025-09-18 06:34:31 -04:00 · 1f7e87c647
commit 1f7e87c647
parent 521865c388
16 changed files with 3369 additions and 14 deletions
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -40,7 +40,6 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        "inline::sentence-transformers",
        "inline::vllm",
        "remote::bedrock",
-        "remote::cerebras",
        "remote::databricks",
        # Technically Nvidia does support OpenAI completions, but none of their hosted models
        # support both completions and chat completions endpoint and all the Llama models are
@ -98,6 +97,8 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
        #  the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
        "remote::tgi",  # TGI ignores n param silently
        "remote::together",  # `n` > 1 is not supported when streaming tokens. Please disable `stream`
+        # Error code 400 - {'message': '"n" > 1 is not currently supported', 'type': 'invalid_request_error', 'param': 'n', 'code': 'wrong_api_format'}
+        "remote::cerebras",
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")

@ -109,7 +110,6 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
        "inline::sentence-transformers",
        "inline::vllm",
        "remote::bedrock",
-        "remote::cerebras",
        "remote::databricks",
        "remote::runpod",
        "remote::watsonx",  # watsonx returns 404 when hitting the /openai/v1 endpoint