feat: update Cerebras inference provider to support dynamic model listing

- update Cerebras to use OpenAIMixin
- enable openai completions tests
- enable openai chat completions tests
- disable with n > 1 tests
- add recording for --setup cerebras --subdirs inference --pattern openai

test with: `./scripts/integration-tests.sh --stack-config server:ci-tests --setup cerebras --subdirs inference --pattern openai`
This commit is contained in:
Matthew Farrellee 2025-09-18 06:34:31 -04:00
parent 521865c388
commit 1f7e87c647
16 changed files with 3369 additions and 14 deletions

View file

@ -40,7 +40,6 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
"inline::sentence-transformers",
"inline::vllm",
"remote::bedrock",
"remote::cerebras",
"remote::databricks",
# Technically Nvidia does support OpenAI completions, but none of their hosted models
# support both completions and chat completions endpoint and all the Llama models are
@ -98,6 +97,8 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
# the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
"remote::tgi", # TGI ignores n param silently
"remote::together", # `n` > 1 is not supported when streaming tokens. Please disable `stream`
# Error code 400 - {'message': '"n" > 1 is not currently supported', 'type': 'invalid_request_error', 'param': 'n', 'code': 'wrong_api_format'}
"remote::cerebras",
):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
@ -109,7 +110,6 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
"inline::sentence-transformers",
"inline::vllm",
"remote::bedrock",
"remote::cerebras",
"remote::databricks",
"remote::runpod",
"remote::watsonx", # watsonx returns 404 when hitting the /openai/v1 endpoint