feat: update Cerebras inference provider to support dynamic model listing (#3481)

# What does this PR do?

- update Cerebras to use OpenAIMixin
- enable openai completions tests
- enable openai chat completions tests
- disable with n > 1 tests
- add recording for --setup cerebras --subdirs inference --pattern
openai


## Test Plan

`./scripts/integration-tests.sh --stack-config server:ci-tests --setup
cerebras --subdirs inference --pattern openai`

```
tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=cerebras/llama-3.3-70b-inference:completion:sanity] 
instantiating llama_stack_client
Port 8321 is already in use, assuming server is already running...
llama_stack_client instantiated in 0.053s
PASSED                                                                                            [  2%]
tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=cerebras/llama-3.3-70b-inference:completion:suffix] SKIPPED (Suffix is not supported for the model: cerebras/llama-3.3-70b.)                   [  4%]
tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=cerebras/llama-3.3-70b-inference:completion:sanity] PASSED                                                                                                [  6%]
tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=cerebras/llama-3.3-70b-1] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support vllm extra_body parameters.)             [  8%]
tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=cerebras/llama-3.3-70b] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support vllm extra_body parameters.)                 [ 10%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:non_streaming_01] PASSED                                                          [ 12%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_01] PASSED                                                                  [ 14%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_01] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cere...) [ 17%]
tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=cerebras/llama-3.3-70b-True] PASSED                                                                                                                     [ 19%]
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=cerebras/llama-3.3-70b-True] PASSED                                                                                                          [ 21%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming_with_file[txt=cerebras/llama-3.3-70b] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support chat completion calls wit...) [ 23%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                               [ 25%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                            [ 27%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                  [ 29%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                             [ 31%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                         [ 34%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                            [ 36%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                         [ 38%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                          [ 40%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                 [ 42%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                     [ 44%]
tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=cerebras/llama-3.3-70b-0] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support vllm extra_body parameters.)             [ 46%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:non_streaming_02] PASSED                                                          [ 48%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_02] PASSED                                                                  [ 51%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_02] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cere...) [ 53%]
tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=cerebras/llama-3.3-70b-False] PASSED                                                                                                                    [ 55%]
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=cerebras/llama-3.3-70b-False] PASSED                                                                                                         [ 57%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                          [ 59%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                       [ 61%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                             [ 63%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                        [ 65%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                    [ 68%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                       [ 70%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                    [ 72%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                     [ 74%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                            [ 76%]
tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test)                                [ 78%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:non_streaming_01] PASSED                                                     [ 80%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_01] PASSED                                                             [ 82%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_01] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote:...) [ 85%]
tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=cerebras/llama-3.3-70b-True] PASSED                                                                                                                [ 87%]
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=cerebras/llama-3.3-70b-True] PASSED                                                                                                     [ 89%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:non_streaming_02] PASSED                                                     [ 91%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_02] PASSED                                                             [ 93%]
tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_02] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote:...) [ 95%]
tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=cerebras/llama-3.3-70b-False] PASSED                                                                                                               [ 97%]
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=cerebras/llama-3.3-70b-False] PASSED                                                                                                    [100%]

=================================================================================================================== slowest 10 durations ====================================================================================================================
0.37s call     tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:non_streaming_01]
0.34s call     tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=cerebras/llama-3.3-70b-False]
0.18s call     tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=cerebras/llama-3.3-70b-True]
0.17s setup    tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=cerebras/llama-3.3-70b-inference:completion:sanity]
0.15s call     tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=cerebras/llama-3.3-70b-True]
0.13s call     tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=cerebras/llama-3.3-70b-True]
0.12s call     tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=cerebras/llama-3.3-70b-False]
0.12s call     tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=cerebras/llama-3.3-70b-True]
0.12s call     tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=cerebras/llama-3.3-70b-False]
0.08s call     tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_02]
================================================================================================================== short test summary info ==================================================================================================================
SKIPPED [1] tests/integration/inference/test_openai_completion.py:75: Suffix is not supported for the model: cerebras/llama-3.3-70b.
SKIPPED [3] tests/integration/inference/test_openai_completion.py:123: Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support vllm extra_body parameters.
SKIPPED [4] tests/integration/inference/test_openai_completion.py:103: Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support n param.
SKIPPED [1] tests/integration/inference/test_openai_completion.py:129: Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support chat completion calls with base64 encoded files.
SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:90: embedding_model_id empty - skipping test
SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:112: embedding_model_id empty - skipping test
SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:136: embedding_model_id empty - skipping test
SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:154: embedding_model_id empty - skipping test
SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:175: embedding_model_id empty - skipping test
SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:195: embedding_model_id empty - skipping test
SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:206: embedding_model_id empty - skipping test
SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:217: embedding_model_id empty - skipping test
SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:244: embedding_model_id empty - skipping test
SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:278: embedding_model_id empty - skipping test
================================================================================================= 18 passed, 29 skipped, 50 deselected, 4 warnings in 3.02s =================================================================================================
```
This commit is contained in:
Matthew Farrellee 2025-09-23 16:26:00 -04:00 committed by GitHub
parent d07ebce4d9
commit ce7a3b4dff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 3370 additions and 13 deletions

View file

@ -9,7 +9,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API | | `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Cerebras API Key | | `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Cerebras API Key |
## Sample Configuration ## Sample Configuration

View file

@ -5,6 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from urllib.parse import urljoin
from cerebras.cloud.sdk import AsyncCerebras from cerebras.cloud.sdk import AsyncCerebras
@ -35,14 +36,13 @@ from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper, ModelRegistryHelper,
) )
from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.openai_compat import (
OpenAIChatCompletionToLlamaStackMixin,
OpenAICompletionToLlamaStackMixin,
get_sampling_options, get_sampling_options,
process_chat_completion_response, process_chat_completion_response,
process_chat_completion_stream_response, process_chat_completion_stream_response,
process_completion_response, process_completion_response,
process_completion_stream_response, process_completion_stream_response,
) )
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from llama_stack.providers.utils.inference.prompt_adapter import ( from llama_stack.providers.utils.inference.prompt_adapter import (
chat_completion_request_to_prompt, chat_completion_request_to_prompt,
completion_request_to_prompt, completion_request_to_prompt,
@ -53,10 +53,9 @@ from .models import MODEL_ENTRIES
class CerebrasInferenceAdapter( class CerebrasInferenceAdapter(
OpenAIMixin,
ModelRegistryHelper, ModelRegistryHelper,
Inference, Inference,
OpenAIChatCompletionToLlamaStackMixin,
OpenAICompletionToLlamaStackMixin,
): ):
def __init__(self, config: CerebrasImplConfig) -> None: def __init__(self, config: CerebrasImplConfig) -> None:
ModelRegistryHelper.__init__( ModelRegistryHelper.__init__(
@ -66,11 +65,17 @@ class CerebrasInferenceAdapter(
self.config = config self.config = config
# TODO: make this use provider data, etc. like other providers # TODO: make this use provider data, etc. like other providers
self.client = AsyncCerebras( self._cerebras_client = AsyncCerebras(
base_url=self.config.base_url, base_url=self.config.base_url,
api_key=self.config.api_key.get_secret_value(), api_key=self.config.api_key.get_secret_value(),
) )
def get_api_key(self) -> str:
return self.config.api_key.get_secret_value()
def get_base_url(self) -> str:
return urljoin(self.config.base_url, "v1")
async def initialize(self) -> None: async def initialize(self) -> None:
return return
@ -107,14 +112,14 @@ class CerebrasInferenceAdapter(
async def _nonstream_completion(self, request: CompletionRequest) -> CompletionResponse: async def _nonstream_completion(self, request: CompletionRequest) -> CompletionResponse:
params = await self._get_params(request) params = await self._get_params(request)
r = await self.client.completions.create(**params) r = await self._cerebras_client.completions.create(**params)
return process_completion_response(r) return process_completion_response(r)
async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator: async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
params = await self._get_params(request) params = await self._get_params(request)
stream = await self.client.completions.create(**params) stream = await self._cerebras_client.completions.create(**params)
async for chunk in process_completion_stream_response(stream): async for chunk in process_completion_stream_response(stream):
yield chunk yield chunk
@ -156,14 +161,14 @@ class CerebrasInferenceAdapter(
async def _nonstream_chat_completion(self, request: CompletionRequest) -> CompletionResponse: async def _nonstream_chat_completion(self, request: CompletionRequest) -> CompletionResponse:
params = await self._get_params(request) params = await self._get_params(request)
r = await self.client.completions.create(**params) r = await self._cerebras_client.completions.create(**params)
return process_chat_completion_response(r, request) return process_chat_completion_response(r, request)
async def _stream_chat_completion(self, request: CompletionRequest) -> AsyncGenerator: async def _stream_chat_completion(self, request: CompletionRequest) -> AsyncGenerator:
params = await self._get_params(request) params = await self._get_params(request)
stream = await self.client.completions.create(**params) stream = await self._cerebras_client.completions.create(**params)
async for chunk in process_chat_completion_stream_response(stream, request): async for chunk in process_chat_completion_stream_response(stream, request):
yield chunk yield chunk

View file

@ -20,8 +20,8 @@ class CerebrasImplConfig(BaseModel):
default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL), default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
description="Base URL for the Cerebras API", description="Base URL for the Cerebras API",
) )
api_key: SecretStr | None = Field( api_key: SecretStr = Field(
default=os.environ.get("CEREBRAS_API_KEY"), default=SecretStr(os.environ.get("CEREBRAS_API_KEY")),
description="Cerebras API Key", description="Cerebras API Key",
) )

View file

@ -40,7 +40,6 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
"inline::sentence-transformers", "inline::sentence-transformers",
"inline::vllm", "inline::vllm",
"remote::bedrock", "remote::bedrock",
"remote::cerebras",
"remote::databricks", "remote::databricks",
# Technically Nvidia does support OpenAI completions, but none of their hosted models # Technically Nvidia does support OpenAI completions, but none of their hosted models
# support both completions and chat completions endpoint and all the Llama models are # support both completions and chat completions endpoint and all the Llama models are
@ -98,6 +97,8 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
# the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'} # the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
"remote::tgi", # TGI ignores n param silently "remote::tgi", # TGI ignores n param silently
"remote::together", # `n` > 1 is not supported when streaming tokens. Please disable `stream` "remote::together", # `n` > 1 is not supported when streaming tokens. Please disable `stream`
# Error code 400 - {'message': '"n" > 1 is not currently supported', 'type': 'invalid_request_error', 'param': 'n', 'code': 'wrong_api_format'}
"remote::cerebras",
"remote::databricks", # Bad request: parameter "n" must be equal to 1 for streaming mode "remote::databricks", # Bad request: parameter "n" must be equal to 1 for streaming mode
): ):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.") pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
@ -110,6 +111,7 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
"inline::sentence-transformers", "inline::sentence-transformers",
"inline::vllm", "inline::vllm",
"remote::bedrock", "remote::bedrock",
"remote::databricks",
"remote::cerebras", "remote::cerebras",
"remote::runpod", "remote::runpod",
"remote::watsonx", # watsonx returns 404 when hitting the /openai/v1 endpoint "remote::watsonx", # watsonx returns 404 when hitting the /openai/v1 endpoint

View file

@ -0,0 +1,53 @@
{
"request": {
"method": "POST",
"url": "https://api.cerebras.ai/v1/v1/completions",
"headers": {},
"body": {
"model": "llama-3.3-70b",
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama-3.3-70b"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "chatcmpl-6438a448-bbbd-4da1-af88-19390676b0e9",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": " blue, sugar is white, but my heart is ________________________.\nA) black\nB) pink\nC) blank\nD) broken\nMy answer is D) broken. This is because the traditional romantic poem has a positive tone until it comes to the heart, which represents the speaker's emotional state. The word \"broken\" shows that the speaker is hurting, which adds a element of sadness to the poem. This is a typical way to express sorrow or longing in poetry.\nThe best answer is D.<|eot_id|>"
}
],
"created": 1758191351,
"model": "llama-3.3-70b",
"object": "text_completion",
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": {
"completion_tokens": 105,
"prompt_tokens": 26,
"total_tokens": 131,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
}
},
"time_info": {
"queue_time": 0.00016155,
"prompt_time": 0.001595551,
"completion_time": 0.107480394,
"total_time": 0.11038637161254883,
"created": 1758191351
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,146 @@
{
"request": {
"method": "POST",
"url": "https://api.cerebras.ai/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama-3.3-70b",
"messages": [
{
"role": "user",
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
}
],
"stream": true,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather in a given city",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city to get the weather for"
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama-3.3-70b"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-8b6a9499-1a5f-46dc-96b7-3d2b71eecd99",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191362,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-8b6a9499-1a5f-46dc-96b7-3d2b71eecd99",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": "439c86fe5",
"function": {
"arguments": "{\"city\": \"Tokyo\"}",
"name": "get_weather"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191362,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-8b6a9499-1a5f-46dc-96b7-3d2b71eecd99",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 1758191362,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": {
"completion_tokens": 12,
"prompt_tokens": 248,
"total_tokens": 260,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
}
},
"time_info": {
"queue_time": 0.00016941,
"prompt_time": 0.007276727,
"completion_time": 0.00388514,
"total_time": 0.013146162033081055,
"created": 1758191362
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,93 @@
{
"request": {
"method": "POST",
"url": "https://api.cerebras.ai/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama-3.3-70b",
"messages": [
{
"role": "user",
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
}
],
"stream": false,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather in a given city",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city to get the weather for"
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama-3.3-70b"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-6228def9-c13d-4d7a-9029-e2c638a16f1b",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": null,
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "1c40cbc30",
"function": {
"arguments": "{\"city\": \"Tokyo\"}",
"name": "get_weather"
},
"type": "function"
}
]
}
}
],
"created": 1758191364,
"model": "llama-3.3-70b",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": {
"completion_tokens": 12,
"prompt_tokens": 248,
"total_tokens": 260,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
}
},
"time_info": {
"queue_time": 0.00041449,
"prompt_time": 0.007237483,
"completion_time": 0.003803105,
"total_time": 0.013348102569580078,
"created": 1758191364
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,742 @@
{
"request": {
"method": "POST",
"url": "https://api.cerebras.ai/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama-3.3-70b",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "llama-3.3-70b"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": "Hello",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": "!",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " It",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": "'s",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " nice",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " to",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " meet",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": ".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " Is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " there",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " something",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " I",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " can",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " help",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " with",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " or",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " would",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " like",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " to",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": " chat",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": "?",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758191361,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": {
"completion_tokens": 25,
"prompt_tokens": 39,
"total_tokens": 64,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
}
},
"time_info": {
"queue_time": 0.00030481,
"prompt_time": 0.002094315,
"completion_time": 0.011856632,
"total_time": 0.016039371490478516,
"created": 1758191361
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,66 @@
{
"request": {
"method": "POST",
"url": "https://api.cerebras.ai/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama-3.3-70b",
"messages": [
{
"role": "user",
"content": "Which planet do humans live on?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "llama-3.3-70b"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-839aab91-21a7-4ed9-b224-d22e524eda37",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Humans live on Earth.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": {
"completion_tokens": 6,
"prompt_tokens": 42,
"total_tokens": 48,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
}
},
"time_info": {
"queue_time": 0.00028033,
"prompt_time": 0.001467015,
"completion_time": 0.007069593,
"total_time": 0.010509490966796875,
"created": 1758191360
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,612 @@
{
"request": {
"method": "POST",
"url": "https://api.cerebras.ai/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama-3.3-70b",
"messages": [
{
"role": "user",
"content": "What is the name of the US captial?"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "llama-3.3-70b"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " name",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " US",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " capital",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " Washington",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": ",",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " D",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": ".C",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": ".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " (",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": "short",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " for",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " District",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": " Columbia",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": ").",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758191363,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": {
"completion_tokens": 20,
"prompt_tokens": 45,
"total_tokens": 65,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
}
},
"time_info": {
"queue_time": 0.000509825,
"prompt_time": 0.002284829,
"completion_time": 0.008430168,
"total_time": 0.012710094451904297,
"created": 1758191363
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,66 @@
{
"request": {
"method": "POST",
"url": "https://api.cerebras.ai/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama-3.3-70b",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "llama-3.3-70b"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-1dcfef1f-f955-4158-a1fc-0c2643b60e4e",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758191362,
"model": "llama-3.3-70b",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": {
"completion_tokens": 25,
"prompt_tokens": 39,
"total_tokens": 64,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
}
},
"time_info": {
"queue_time": 0.083508803,
"prompt_time": 0.003352167,
"completion_time": 0.011506416,
"total_time": 0.09965348243713379,
"created": 1758191362
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,66 @@
{
"request": {
"method": "POST",
"url": "https://api.cerebras.ai/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama-3.3-70b",
"messages": [
{
"role": "user",
"content": "Which planet has rings around it with a name starting with letter S?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "llama-3.3-70b"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-e9e83004-bcd0-47f8-97c3-8e3d789a6573",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "The planet with rings around it that starts with the letter S is Saturn. Saturn's rings are one of the most prominent and well-known ring systems in our solar system.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758191362,
"model": "llama-3.3-70b",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": {
"completion_tokens": 35,
"prompt_tokens": 49,
"total_tokens": 84,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
}
},
"time_info": {
"queue_time": 0.00091223,
"prompt_time": 0.00239449,
"completion_time": 0.013951346,
"total_time": 0.01872849464416504,
"created": 1758191362
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,378 @@
{
"request": {
"method": "POST",
"url": "https://api.cerebras.ai/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama-3.3-70b",
"messages": [
{
"role": "user",
"content": "What's the name of the Sun in latin?"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "llama-3.3-70b"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": " Latin",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": " name",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": " for",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": " Sun",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": " is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": " \"",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": "Sol",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": "\".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758191360,
"model": "llama-3.3-70b",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_c5ec625e72d41732d8fd",
"usage": {
"completion_tokens": 11,
"prompt_tokens": 45,
"total_tokens": 56,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
}
},
"time_info": {
"queue_time": 9.281e-05,
"prompt_time": 0.002694912,
"completion_time": 0.003747467,
"total_time": 0.008375167846679688,
"created": 1758191360
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,96 @@
{
"request": {
"method": "POST",
"url": "https://api.cerebras.ai/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama-4-maverick-17b-128e-instruct",
"created": 0,
"object": "model",
"owned_by": "Cerebras"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama-4-scout-17b-16e-instruct",
"created": 0,
"object": "model",
"owned_by": "Cerebras"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "qwen-3-235b-a22b-instruct-2507",
"created": 0,
"object": "model",
"owned_by": "Cerebras"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.1-8b",
"created": 0,
"object": "model",
"owned_by": "Cerebras"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "qwen-3-32b",
"created": 0,
"object": "model",
"owned_by": "Cerebras"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-oss-120b",
"created": 0,
"object": "model",
"owned_by": "Cerebras"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "qwen-3-235b-a22b-thinking-2507",
"created": 0,
"object": "model",
"owned_by": "Cerebras"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama-3.3-70b",
"created": 0,
"object": "model",
"owned_by": "Cerebras"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "qwen-3-coder-480b",
"created": 0,
"object": "model",
"owned_by": "Cerebras"
}
}
],
"is_streaming": false
}
}

View file

@ -108,6 +108,13 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
"embedding_model": "together/togethercomputer/m2-bert-80M-32k-retrieval", "embedding_model": "together/togethercomputer/m2-bert-80M-32k-retrieval",
}, },
), ),
"cerebras": Setup(
name="cerebras",
description="Cerebras models",
defaults={
"text_model": "cerebras/llama-3.3-70b",
},
),
"databricks": Setup( "databricks": Setup(
name="databricks", name="databricks",
description="Databricks models", description="Databricks models",