Merge b1cbfe99f9 into sapling-pr-archive-ehhuang

This commit is contained in:
ehhuang 2025-09-29 15:52:57 -07:00 committed by GitHub
commit 91898e6598
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
81 changed files with 51742 additions and 2402 deletions

View file

@ -43,7 +43,7 @@ jobs:
# Cache oasdiff to avoid checksum failures and speed up builds
- name: Cache oasdiff
id: cache-oasdiff
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830
with:
path: ~/oasdiff
key: oasdiff-${{ runner.os }}

View file

@ -4,6 +4,8 @@ include llama_stack/models/llama/llama4/tokenizer.model
include llama_stack/core/*.sh
include llama_stack/cli/scripts/*.sh
include llama_stack/distributions/*/*.yaml
include llama_stack/providers/tests/test_cases/inference/*.json
exclude llama_stack/distributions/ci-tests
include tests/integration/test_cases/inference/*.json
include llama_stack/models/llama/*/*.md
include llama_stack/tests/integration/*.jpg
prune llama_stack/distributions/ci-tests

View file

@ -139,18 +139,7 @@ Methods:
- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn">client.agents.turn.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn_create_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn_create_response.py">TurnCreateResponse</a></code>
- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.agents.turn.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn.py">Turn</a></code>
## BatchInference
Types:
```python
from llama_stack_client.types import BatchInferenceChatCompletionResponse
```
Methods:
- <code title="post /v1/batch-inference/chat-completion">client.batch_inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/batch_inference.py">chat_completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/batch_inference_chat_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/batch_inference_chat_completion_response.py">BatchInferenceChatCompletionResponse</a></code>
- <code title="post /v1/batch-inference/completion">client.batch_inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/batch_inference.py">completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/batch_inference_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/shared/batch_completion.py">BatchCompletion</a></code>
## Datasets

View file

@ -548,7 +548,6 @@ class Generator:
if op.defining_class.__name__ in [
"SyntheticDataGeneration",
"PostTraining",
"BatchInference",
]:
op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
print(op.defining_class.__name__)

View file

@ -87,94 +87,6 @@
}
}
},
"/v1/inference/batch-chat-completion": {
"post": {
"responses": {
"200": {
"description": "A BatchChatCompletionResponse with the full completions.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchChatCompletionResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Inference"
],
"summary": "Generate chat completions for a batch of messages using the specified model.",
"description": "Generate chat completions for a batch of messages using the specified model.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchChatCompletionRequest"
}
}
},
"required": true
}
}
},
"/v1/inference/batch-completion": {
"post": {
"responses": {
"200": {
"description": "A BatchCompletionResponse with the full completions.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchCompletionResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Inference"
],
"summary": "Generate completions for a batch of content using the specified model.",
"description": "Generate completions for a batch of content using the specified model.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchCompletionRequest"
}
}
},
"required": true
}
}
},
"/v1alpha/post-training/job/cancel": {
"post": {
"responses": {
@ -281,7 +193,7 @@
}
},
"tags": [
"BatchInference (Coming Soon)"
"Inference"
],
"summary": "Generate a chat completion for the given messages using the specified model.",
"description": "Generate a chat completion for the given messages using the specified model.",
@ -298,55 +210,6 @@
}
}
},
"/v1/inference/completion": {
"post": {
"responses": {
"200": {
"description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CompletionResponse"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/CompletionResponseStreamChunk"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"BatchInference (Coming Soon)"
],
"summary": "Generate a completion for the given content using the specified model.",
"description": "Generate a completion for the given content using the specified model.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CompletionRequest"
}
}
},
"required": true
}
}
},
"/v1/agents": {
"get": {
"responses": {
@ -6346,6 +6209,20 @@
],
"title": "AppendRowsRequest"
},
"CancelTrainingJobRequest": {
"type": "object",
"properties": {
"job_uuid": {
"type": "string",
"description": "The UUID of the job to cancel."
}
},
"additionalProperties": false,
"required": [
"job_uuid"
],
"title": "CancelTrainingJobRequest"
},
"CompletionMessage": {
"type": "object",
"properties": {
@ -6906,6 +6783,31 @@
"type": "boolean",
"default": true
},
"items": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"title": {
"type": "string"
},
"default": {
"oneOf": [
{
@ -7051,26 +6953,23 @@
"title": "UserMessage",
"description": "A message from the user in a chat conversation."
},
"BatchChatCompletionRequest": {
"ChatCompletionRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string",
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
},
"messages_batch": {
"messages": {
"type": "array",
"items": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
"$ref": "#/components/schemas/Message"
},
"description": "The messages to generate completions for."
"description": "List of messages in the conversation."
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams",
"description": "(Optional) Parameters to control the sampling strategy."
"description": "Parameters to control the sampling strategy."
},
"tools": {
"type": "array",
@ -7079,13 +6978,31 @@
},
"description": "(Optional) List of tool definitions available to the model."
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig",
"description": "(Optional) Configuration for tool use."
"tool_choice": {
"type": "string",
"enum": [
"auto",
"required",
"none"
],
"description": "(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead."
},
"tool_prompt_format": {
"type": "string",
"enum": [
"json",
"function_tag",
"python_list"
],
"description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead."
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding."
"description": "(Optional) Grammar specification for guided (structured) decoding. There are two options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it."
},
"stream": {
"type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
},
"logprobs": {
"type": "object",
@ -7098,32 +7015,18 @@
},
"additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig",
"description": "(Optional) Configuration for tool use."
}
},
"additionalProperties": false,
"required": [
"model_id",
"messages_batch"
"messages"
],
"title": "BatchChatCompletionRequest"
},
"BatchChatCompletionResponse": {
"type": "object",
"properties": {
"batch": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ChatCompletionResponse"
},
"description": "List of chat completion responses, one for each conversation in the batch"
}
},
"additionalProperties": false,
"required": [
"batch"
],
"title": "BatchChatCompletionResponse",
"description": "Response from a batch chat completion request."
"title": "ChatCompletionRequest"
},
"ChatCompletionResponse": {
"type": "object",
@ -7203,194 +7106,6 @@
"title": "TokenLogProbs",
"description": "Log probabilities for generated tokens."
},
"BatchCompletionRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string",
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
},
"content_batch": {
"type": "array",
"items": {
"$ref": "#/components/schemas/InterleavedContent"
},
"description": "The content to generate completions for."
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams",
"description": "(Optional) Parameters to control the sampling strategy."
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding."
},
"logprobs": {
"type": "object",
"properties": {
"top_k": {
"type": "integer",
"default": 0,
"description": "How many tokens (for each position) to return log probabilities for."
}
},
"additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
}
},
"additionalProperties": false,
"required": [
"model_id",
"content_batch"
],
"title": "BatchCompletionRequest"
},
"BatchCompletionResponse": {
"type": "object",
"properties": {
"batch": {
"type": "array",
"items": {
"$ref": "#/components/schemas/CompletionResponse"
},
"description": "List of completion responses, one for each input in the batch"
}
},
"additionalProperties": false,
"required": [
"batch"
],
"title": "BatchCompletionResponse",
"description": "Response from a batch completion request."
},
"CompletionResponse": {
"type": "object",
"properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
},
"description": "(Optional) List of metrics associated with the API response"
},
"content": {
"type": "string",
"description": "The generated completion text"
},
"stop_reason": {
"type": "string",
"enum": [
"end_of_turn",
"end_of_message",
"out_of_tokens"
],
"description": "Reason why generation stopped"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
},
"description": "Optional log probabilities for generated tokens"
}
},
"additionalProperties": false,
"required": [
"content",
"stop_reason"
],
"title": "CompletionResponse",
"description": "Response from a completion request."
},
"CancelTrainingJobRequest": {
"type": "object",
"properties": {
"job_uuid": {
"type": "string",
"description": "The UUID of the job to cancel."
}
},
"additionalProperties": false,
"required": [
"job_uuid"
],
"title": "CancelTrainingJobRequest"
},
"ChatCompletionRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string",
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
},
"messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
},
"description": "List of messages in the conversation."
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams",
"description": "Parameters to control the sampling strategy."
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolDefinition"
},
"description": "(Optional) List of tool definitions available to the model."
},
"tool_choice": {
"type": "string",
"enum": [
"auto",
"required",
"none"
],
"description": "(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead."
},
"tool_prompt_format": {
"type": "string",
"enum": [
"json",
"function_tag",
"python_list"
],
"description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead."
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding. There are two options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it."
},
"stream": {
"type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
},
"logprobs": {
"type": "object",
"properties": {
"top_k": {
"type": "integer",
"default": 0,
"description": "How many tokens (for each position) to return log probabilities for."
}
},
"additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig",
"description": "(Optional) Configuration for tool use."
}
},
"additionalProperties": false,
"required": [
"model_id",
"messages"
],
"title": "ChatCompletionRequest"
},
"ChatCompletionResponseEvent": {
"type": "object",
"properties": {
@ -7560,87 +7275,6 @@
"title": "ToolCallDelta",
"description": "A tool call content delta for streaming responses."
},
"CompletionRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string",
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
},
"content": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content to generate a completion for."
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams",
"description": "(Optional) Parameters to control the sampling strategy."
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding."
},
"stream": {
"type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
},
"logprobs": {
"type": "object",
"properties": {
"top_k": {
"type": "integer",
"default": 0,
"description": "How many tokens (for each position) to return log probabilities for."
}
},
"additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
}
},
"additionalProperties": false,
"required": [
"model_id",
"content"
],
"title": "CompletionRequest"
},
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
},
"description": "(Optional) List of metrics associated with the API response"
},
"delta": {
"type": "string",
"description": "New content generated since last chunk. This can be one or more tokens."
},
"stop_reason": {
"type": "string",
"enum": [
"end_of_turn",
"end_of_message",
"out_of_tokens"
],
"description": "Optional reason why generation stopped, if complete"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
},
"description": "Optional log probabilities for generated tokens"
}
},
"additionalProperties": false,
"required": [
"delta"
],
"title": "CompletionResponseStreamChunk",
"description": "A chunk of a streamed completion response."
},
"AgentConfig": {
"type": "object",
"properties": {
@ -7848,6 +7482,14 @@
"default": true,
"description": "Whether this parameter is required for tool invocation"
},
"items": {
"type": "object",
"description": "Type of the elements when parameter_type is array"
},
"title": {
"type": "string",
"description": "(Optional) Title of the parameter"
},
"default": {
"oneOf": [
{
@ -18779,11 +18421,6 @@
"description": "Main functionalities provided by this API:\n- Create agents with specific instructions and ability to use tools.\n- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".\n- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).\n- Agents can be provided with various shields (see the Safety API for more details).\n- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.",
"x-displayName": "Agents API for creating and interacting with agentic systems."
},
{
"name": "BatchInference (Coming Soon)",
"description": "This is an asynchronous API. If the request is successful, the response will be a job which can be polled for completion.\n\nNOTE: This API is not yet implemented and is subject to change in concert with other asynchronous APIs\nincluding (post-training, evals, etc).",
"x-displayName": "Batch inference API for generating completions and chat completions."
},
{
"name": "Benchmarks"
},
@ -18858,7 +18495,6 @@
"name": "Operations",
"tags": [
"Agents",
"BatchInference (Coming Soon)",
"Benchmarks",
"DatasetIO",
"Datasets",

View file

@ -43,72 +43,6 @@ paths:
schema:
$ref: '#/components/schemas/AppendRowsRequest'
required: true
/v1/inference/batch-chat-completion:
post:
responses:
'200':
description: >-
A BatchChatCompletionResponse with the full completions.
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
summary: >-
Generate chat completions for a batch of messages using the specified model.
description: >-
Generate chat completions for a batch of messages using the specified model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionRequest'
required: true
/v1/inference/batch-completion:
post:
responses:
'200':
description: >-
A BatchCompletionResponse with the full completions.
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
summary: >-
Generate completions for a batch of content using the specified model.
description: >-
Generate completions for a batch of content using the specified model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
/v1alpha/post-training/job/cancel:
post:
responses:
@ -186,7 +120,7 @@ paths:
default:
$ref: '#/components/responses/DefaultError'
tags:
- BatchInference (Coming Soon)
- Inference
summary: >-
Generate a chat completion for the given messages using the specified model.
description: >-
@ -198,43 +132,6 @@ paths:
schema:
$ref: '#/components/schemas/ChatCompletionRequest'
required: true
/v1/inference/completion:
post:
responses:
'200':
description: >-
If stream=False, returns a CompletionResponse with the full completion.
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionResponse'
text/event-stream:
schema:
$ref: '#/components/schemas/CompletionResponseStreamChunk'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- BatchInference (Coming Soon)
summary: >-
Generate a completion for the given content using the specified model.
description: >-
Generate a completion for the given content using the specified model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionRequest'
required: true
/v1/agents:
get:
responses:
@ -4559,6 +4456,16 @@ components:
required:
- rows
title: AppendRowsRequest
CancelTrainingJobRequest:
type: object
properties:
job_uuid:
type: string
description: The UUID of the job to cancel.
additionalProperties: false
required:
- job_uuid
title: CancelTrainingJobRequest
CompletionMessage:
type: object
properties:
@ -4959,6 +4866,16 @@ components:
required:
type: boolean
default: true
items:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
title:
type: string
default:
oneOf:
- type: 'null'
@ -5076,224 +4993,6 @@ components:
title: UserMessage
description: >-
A message from the user in a chat conversation.
BatchChatCompletionRequest:
type: object
properties:
model_id:
type: string
description: >-
The identifier of the model to use. The model must be registered with
Llama Stack and available via the /models endpoint.
messages_batch:
type: array
items:
type: array
items:
$ref: '#/components/schemas/Message'
description: >-
The messages to generate completions for.
sampling_params:
$ref: '#/components/schemas/SamplingParams'
description: >-
(Optional) Parameters to control the sampling strategy.
tools:
type: array
items:
$ref: '#/components/schemas/ToolDefinition'
description: >-
(Optional) List of tool definitions available to the model.
tool_config:
$ref: '#/components/schemas/ToolConfig'
description: (Optional) Configuration for tool use.
response_format:
$ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding.
logprobs:
type: object
properties:
top_k:
type: integer
default: 0
description: >-
How many tokens (for each position) to return log probabilities for.
additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false
required:
- model_id
- messages_batch
title: BatchChatCompletionRequest
BatchChatCompletionResponse:
type: object
properties:
batch:
type: array
items:
$ref: '#/components/schemas/ChatCompletionResponse'
description: >-
List of chat completion responses, one for each conversation in the batch
additionalProperties: false
required:
- batch
title: BatchChatCompletionResponse
description: >-
Response from a batch chat completion request.
ChatCompletionResponse:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
description: >-
(Optional) List of metrics associated with the API response
completion_message:
$ref: '#/components/schemas/CompletionMessage'
description: The complete response message
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
description: >-
Optional log probabilities for generated tokens
additionalProperties: false
required:
- completion_message
title: ChatCompletionResponse
description: Response from a chat completion request.
MetricInResponse:
type: object
properties:
metric:
type: string
description: The name of the metric
value:
oneOf:
- type: integer
- type: number
description: The numeric value of the metric
unit:
type: string
description: >-
(Optional) The unit of measurement for the metric value
additionalProperties: false
required:
- metric
- value
title: MetricInResponse
description: >-
A metric value included in API responses.
TokenLogProbs:
type: object
properties:
logprobs_by_token:
type: object
additionalProperties:
type: number
description: >-
Dictionary mapping tokens to their log probabilities
additionalProperties: false
required:
- logprobs_by_token
title: TokenLogProbs
description: Log probabilities for generated tokens.
BatchCompletionRequest:
type: object
properties:
model_id:
type: string
description: >-
The identifier of the model to use. The model must be registered with
Llama Stack and available via the /models endpoint.
content_batch:
type: array
items:
$ref: '#/components/schemas/InterleavedContent'
description: The content to generate completions for.
sampling_params:
$ref: '#/components/schemas/SamplingParams'
description: >-
(Optional) Parameters to control the sampling strategy.
response_format:
$ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding.
logprobs:
type: object
properties:
top_k:
type: integer
default: 0
description: >-
How many tokens (for each position) to return log probabilities for.
additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false
required:
- model_id
- content_batch
title: BatchCompletionRequest
BatchCompletionResponse:
type: object
properties:
batch:
type: array
items:
$ref: '#/components/schemas/CompletionResponse'
description: >-
List of completion responses, one for each input in the batch
additionalProperties: false
required:
- batch
title: BatchCompletionResponse
description: >-
Response from a batch completion request.
CompletionResponse:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
description: >-
(Optional) List of metrics associated with the API response
content:
type: string
description: The generated completion text
stop_reason:
type: string
enum:
- end_of_turn
- end_of_message
- out_of_tokens
description: Reason why generation stopped
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
description: >-
Optional log probabilities for generated tokens
additionalProperties: false
required:
- content
- stop_reason
title: CompletionResponse
description: Response from a completion request.
CancelTrainingJobRequest:
type: object
properties:
job_uuid:
type: string
description: The UUID of the job to cancel.
additionalProperties: false
required:
- job_uuid
title: CancelTrainingJobRequest
ChatCompletionRequest:
type: object
properties:
@ -5372,6 +5071,65 @@ components:
- model_id
- messages
title: ChatCompletionRequest
ChatCompletionResponse:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
description: >-
(Optional) List of metrics associated with the API response
completion_message:
$ref: '#/components/schemas/CompletionMessage'
description: The complete response message
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
description: >-
Optional log probabilities for generated tokens
additionalProperties: false
required:
- completion_message
title: ChatCompletionResponse
description: Response from a chat completion request.
MetricInResponse:
type: object
properties:
metric:
type: string
description: The name of the metric
value:
oneOf:
- type: integer
- type: number
description: The numeric value of the metric
unit:
type: string
description: >-
(Optional) The unit of measurement for the metric value
additionalProperties: false
required:
- metric
- value
title: MetricInResponse
description: >-
A metric value included in API responses.
TokenLogProbs:
type: object
properties:
logprobs_by_token:
type: object
additionalProperties:
type: number
description: >-
Dictionary mapping tokens to their log probabilities
additionalProperties: false
required:
- logprobs_by_token
title: TokenLogProbs
description: Log probabilities for generated tokens.
ChatCompletionResponseEvent:
type: object
properties:
@ -5507,81 +5265,6 @@ components:
title: ToolCallDelta
description: >-
A tool call content delta for streaming responses.
CompletionRequest:
type: object
properties:
model_id:
type: string
description: >-
The identifier of the model to use. The model must be registered with
Llama Stack and available via the /models endpoint.
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content to generate a completion for.
sampling_params:
$ref: '#/components/schemas/SamplingParams'
description: >-
(Optional) Parameters to control the sampling strategy.
response_format:
$ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding.
stream:
type: boolean
description: >-
(Optional) If True, generate an SSE event stream of the response. Defaults
to False.
logprobs:
type: object
properties:
top_k:
type: integer
default: 0
description: >-
How many tokens (for each position) to return log probabilities for.
additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false
required:
- model_id
- content
title: CompletionRequest
CompletionResponseStreamChunk:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
description: >-
(Optional) List of metrics associated with the API response
delta:
type: string
description: >-
New content generated since last chunk. This can be one or more tokens.
stop_reason:
type: string
enum:
- end_of_turn
- end_of_message
- out_of_tokens
description: >-
Optional reason why generation stopped, if complete
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
description: >-
Optional log probabilities for generated tokens
additionalProperties: false
required:
- delta
title: CompletionResponseStreamChunk
description: >-
A chunk of a streamed completion response.
AgentConfig:
type: object
properties:
@ -5730,6 +5413,13 @@ components:
default: true
description: >-
Whether this parameter is required for tool invocation
items:
type: object
description: >-
Type of the elements when parameter_type is array
title:
type: string
description: (Optional) Title of the parameter
default:
oneOf:
- type: 'null'
@ -13983,18 +13673,6 @@ tags:
the RAG Tool and Vector IO APIs for more details.
x-displayName: >-
Agents API for creating and interacting with agentic systems.
- name: BatchInference (Coming Soon)
description: >-
This is an asynchronous API. If the request is successful, the response will
be a job which can be polled for completion.
NOTE: This API is not yet implemented and is subject to change in concert with
other asynchronous APIs
including (post-training, evals, etc).
x-displayName: >-
Batch inference API for generating completions and chat completions.
- name: Benchmarks
- name: DatasetIO
- name: Datasets
@ -14037,7 +13715,6 @@ x-tagGroups:
- name: Operations
tags:
- Agents
- BatchInference (Coming Soon)
- Benchmarks
- DatasetIO
- Datasets

View file

@ -1,79 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Protocol, runtime_checkable
from llama_stack.apis.common.job_types import Job
from llama_stack.apis.inference import (
InterleavedContent,
LogProbConfig,
Message,
ResponseFormat,
SamplingParams,
ToolChoice,
ToolDefinition,
ToolPromptFormat,
)
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import webmethod
@runtime_checkable
class BatchInference(Protocol):
"""Batch inference API for generating completions and chat completions.
This is an asynchronous API. If the request is successful, the response will be a job which can be polled for completion.
NOTE: This API is not yet implemented and is subject to change in concert with other asynchronous APIs
including (post-training, evals, etc).
"""
@webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
async def completion(
self,
model: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> Job:
"""Generate completions for a batch of content.
:param model: The model to use for the completion.
:param content_batch: The content to complete.
:param sampling_params: The sampling parameters to use for the completion.
:param response_format: The response format to use for the completion.
:param logprobs: The logprobs to use for the completion.
:returns: A job for the completion.
"""
...
@webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
async def chat_completion(
self,
model: str,
messages_batch: list[list[Message]],
sampling_params: SamplingParams | None = None,
# zero-shot tool definitions as input to the model
tools: list[ToolDefinition] | None = None,
tool_choice: ToolChoice | None = ToolChoice.auto,
tool_prompt_format: ToolPromptFormat | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> Job:
"""Generate chat completions for a batch of messages.
:param model: The model to use for the chat completion.
:param messages_batch: The messages to complete.
:param sampling_params: The sampling parameters to use for the completion.
:param tools: The tools to use for the chat completion.
:param tool_choice: The tool choice to use for the chat completion.
:param tool_prompt_format: The tool prompt format to use for the chat completion.
:param response_format: The response format to use for the chat completion.
:param logprobs: The logprobs to use for the chat completion.
:returns: A job for the chat completion.
"""
...

View file

@ -975,26 +975,6 @@ class EmbeddingTaskType(Enum):
document = "document"
@json_schema_type
class BatchCompletionResponse(BaseModel):
"""Response from a batch completion request.
:param batch: List of completion responses, one for each input in the batch
"""
batch: list[CompletionResponse]
@json_schema_type
class BatchChatCompletionResponse(BaseModel):
"""Response from a batch chat completion request.
:param batch: List of chat completion responses, one for each conversation in the batch
"""
batch: list[ChatCompletionResponse]
class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
input_messages: list[OpenAIMessageParam]
@ -1028,7 +1008,6 @@ class InferenceProvider(Protocol):
model_store: ModelStore | None = None
@webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
async def completion(
self,
model_id: str,
@ -1051,27 +1030,6 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
async def batch_completion(
self,
model_id: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> BatchCompletionResponse:
"""Generate completions for a batch of content using the specified model.
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
:param content_batch: The content to generate completions for.
:param sampling_params: (Optional) Parameters to control the sampling strategy.
:param response_format: (Optional) Grammar specification for guided (structured) decoding.
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
:returns: A BatchCompletionResponse with the full completions.
"""
raise NotImplementedError("Batch completion is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
async def chat_completion(
self,
@ -1112,31 +1070,6 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
async def batch_chat_completion(
self,
model_id: str,
messages_batch: list[list[Message]],
sampling_params: SamplingParams | None = None,
tools: list[ToolDefinition] | None = None,
tool_config: ToolConfig | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> BatchChatCompletionResponse:
"""Generate chat completions for a batch of messages using the specified model.
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
:param messages_batch: The messages to generate completions for.
:param sampling_params: (Optional) Parameters to control the sampling strategy.
:param tools: (Optional) List of tool definitions available to the model.
:param tool_config: (Optional) Configuration for tool use.
:param response_format: (Optional) Grammar specification for guided (structured) decoding.
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
:returns: A BatchChatCompletionResponse with the full completions.
"""
raise NotImplementedError("Batch chat completion is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def embeddings(
self,

View file

@ -27,6 +27,8 @@ class ToolParameter(BaseModel):
:param parameter_type: Type of the parameter (e.g., string, integer)
:param description: Human-readable description of what the parameter does
:param required: Whether this parameter is required for tool invocation
:param items: Type of the elements when parameter_type is array
:param title: (Optional) Title of the parameter
:param default: (Optional) Default value for the parameter if not provided
"""
@ -34,6 +36,8 @@ class ToolParameter(BaseModel):
parameter_type: str
description: str
required: bool = Field(default=True)
items: dict | None = None
title: str | None = None
default: Any | None = None

View file

@ -20,8 +20,6 @@ from llama_stack.apis.common.content_types import (
)
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
from llama_stack.apis.inference import (
BatchChatCompletionResponse,
BatchCompletionResponse,
ChatCompletionResponse,
ChatCompletionResponseEventType,
ChatCompletionResponseStreamChunk,
@ -273,30 +271,6 @@ class InferenceRouter(Inference):
)
return response
async def batch_chat_completion(
self,
model_id: str,
messages_batch: list[list[Message]],
tools: list[ToolDefinition] | None = None,
tool_config: ToolConfig | None = None,
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> BatchChatCompletionResponse:
logger.debug(
f"InferenceRouter.batch_chat_completion: {model_id=}, {len(messages_batch)=}, {sampling_params=}, {response_format=}, {logprobs=}",
)
provider = await self.routing_table.get_provider_impl(model_id)
return await provider.batch_chat_completion(
model_id=model_id,
messages_batch=messages_batch,
tools=tools,
tool_config=tool_config,
sampling_params=sampling_params,
response_format=response_format,
logprobs=logprobs,
)
async def completion(
self,
model_id: str,
@ -338,20 +312,6 @@ class InferenceRouter(Inference):
return response
async def batch_completion(
self,
model_id: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> BatchCompletionResponse:
logger.debug(
f"InferenceRouter.batch_completion: {model_id=}, {len(content_batch)=}, {sampling_params=}, {response_format=}, {logprobs=}",
)
provider = await self.routing_table.get_provider_impl(model_id)
return await provider.batch_completion(model_id, content_batch, sampling_params, response_format, logprobs)
async def embeddings(
self,
model_id: str,

View file

@ -14,7 +14,6 @@ from typing import Any
import yaml
from llama_stack.apis.agents import Agents
from llama_stack.apis.batch_inference import BatchInference
from llama_stack.apis.benchmarks import Benchmarks
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
@ -54,7 +53,6 @@ class LlamaStack(
Providers,
VectorDBs,
Inference,
BatchInference,
Agents,
Safety,
SyntheticDataGeneration,

View file

@ -92,6 +92,8 @@ class ToolParamDefinition(BaseModel):
param_type: str
description: str | None = None
required: bool | None = True
items: Any | None = None
title: str | None = None
default: Any | None = None

View file

@ -798,6 +798,8 @@ class ChatAgent(ShieldRunnerMixin):
param_type=param.parameter_type,
description=param.description,
required=param.required,
items=param.items,
title=param.title,
default=param.default,
)
for param in tool_def.parameters
@ -841,6 +843,8 @@ class ChatAgent(ShieldRunnerMixin):
param_type=param.parameter_type,
description=param.description,
required=param.required,
items=param.items,
title=param.title,
default=param.default,
)
for param in tool_def.parameters
@ -920,7 +924,7 @@ async def get_raw_document_text(document: Document) -> str:
DeprecationWarning,
stacklevel=2,
)
elif not (document.mime_type.startswith("text/") or document.mime_type == "application/yaml"):
elif not (document.mime_type.startswith("text/") or document.mime_type in ("application/yaml", "application/json")):
raise ValueError(f"Unexpected document mime type: {document.mime_type}")
if isinstance(document.content, URL):

View file

@ -568,6 +568,7 @@ class StreamingResponseOrchestrator:
description=param.description,
required=param.required,
default=param.default,
items=param.items,
)
for param in t.parameters
},

View file

@ -18,8 +18,6 @@ from llama_stack.apis.common.content_types import (
ToolCallParseStatus,
)
from llama_stack.apis.inference import (
BatchChatCompletionResponse,
BatchCompletionResponse,
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionResponseEvent,
@ -219,41 +217,6 @@ class MetaReferenceInferenceImpl(
results = await self._nonstream_completion([request])
return results[0]
async def batch_completion(
self,
model_id: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
stream: bool | None = False,
logprobs: LogProbConfig | None = None,
) -> BatchCompletionResponse:
if sampling_params is None:
sampling_params = SamplingParams()
if logprobs:
assert logprobs.top_k == 1, f"Unexpected top_k={logprobs.top_k}"
content_batch = [
augment_content_with_response_format_prompt(response_format, content) for content in content_batch
]
request_batch = []
for content in content_batch:
request = CompletionRequest(
model=model_id,
content=content,
sampling_params=sampling_params,
response_format=response_format,
stream=stream,
logprobs=logprobs,
)
self.check_model(request)
request = await convert_request_to_raw(request)
request_batch.append(request)
results = await self._nonstream_completion(request_batch)
return BatchCompletionResponse(batch=results)
async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
tokenizer = self.generator.formatter.tokenizer
@ -399,49 +362,6 @@ class MetaReferenceInferenceImpl(
results = await self._nonstream_chat_completion([request])
return results[0]
async def batch_chat_completion(
self,
model_id: str,
messages_batch: list[list[Message]],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
tools: list[ToolDefinition] | None = None,
stream: bool | None = False,
logprobs: LogProbConfig | None = None,
tool_config: ToolConfig | None = None,
) -> BatchChatCompletionResponse:
if sampling_params is None:
sampling_params = SamplingParams()
if logprobs:
assert logprobs.top_k == 1, f"Unexpected top_k={logprobs.top_k}"
# wrapper request to make it easier to pass around (internal only, not exposed to API)
request_batch = []
for messages in messages_batch:
request = ChatCompletionRequest(
model=model_id,
messages=messages,
sampling_params=sampling_params,
tools=tools or [],
response_format=response_format,
logprobs=logprobs,
tool_config=tool_config or ToolConfig(),
)
self.check_model(request)
# augment and rewrite messages depending on the model
request.messages = chat_completion_request_to_messages(request, self.llama_model.core_model_id.value)
# download media and convert to raw content so we can send it to the model
request = await convert_request_to_raw(request)
request_batch.append(request)
if self.config.create_distributed_process_group:
if SEMAPHORE.locked():
raise RuntimeError("Only one concurrent request is supported")
results = await self._nonstream_chat_completion(request_batch)
return BatchChatCompletionResponse(batch=results)
async def _nonstream_chat_completion(
self, request_batch: list[ChatCompletionRequest]
) -> list[ChatCompletionResponse]:

View file

@ -61,6 +61,7 @@ logger = get_logger(name=__name__, category="inference::fireworks")
class FireworksInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, NeedsRequestProviderData):
embedding_model_metadata = {
"nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192},
"accounts/fireworks/models/qwen3-embedding-8b": {"embedding_dimension": 4096, "context_length": 40960},
}
def __init__(self, config: FireworksImplConfig) -> None:

View file

@ -6,8 +6,7 @@
import asyncio
import base64
from collections.abc import AsyncGenerator, AsyncIterator
from collections.abc import AsyncGenerator
from typing import Any
from ollama import AsyncClient as AsyncOllamaClient
@ -33,10 +32,6 @@ from llama_stack.apis.inference import (
JsonSchemaResponseFormat,
LogProbConfig,
Message,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIMessageParam,
OpenAIResponseFormatParam,
ResponseFormat,
SamplingParams,
TextTruncation,
@ -62,7 +57,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice,
OpenAICompatCompletionResponse,
get_sampling_options,
prepare_openai_completion_params,
process_chat_completion_response,
process_chat_completion_stream_response,
process_completion_response,
@ -75,7 +69,6 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
content_has_media,
convert_image_content_to_url,
interleaved_content_as_str,
localize_image_content,
request_has_media,
)
@ -84,6 +77,7 @@ logger = get_logger(name=__name__, category="inference::ollama")
class OllamaInferenceAdapter(
OpenAIMixin,
ModelRegistryHelper,
InferenceProvider,
ModelsProtocolPrivate,
):
@ -129,6 +123,8 @@ class OllamaInferenceAdapter(
],
)
self.config = config
# Ollama does not support image urls, so we need to download the image and convert it to base64
self.download_images = True
self._clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {}
@property
@ -173,9 +169,6 @@ class OllamaInferenceAdapter(
async def shutdown(self) -> None:
self._clients.clear()
async def unregister_model(self, model_id: str) -> None:
pass
async def _get_model(self, model_id: str) -> Model:
if not self.model_store:
raise ValueError("Model store not set")
@ -403,75 +396,6 @@ class OllamaInferenceAdapter(
raise UnsupportedModelError(model.provider_model_id, list(self._model_cache.keys()))
async def openai_chat_completion(
self,
model: str,
messages: list[OpenAIMessageParam],
frequency_penalty: float | None = None,
function_call: str | dict[str, Any] | None = None,
functions: list[dict[str, Any]] | None = None,
logit_bias: dict[str, float] | None = None,
logprobs: bool | None = None,
max_completion_tokens: int | None = None,
max_tokens: int | None = None,
n: int | None = None,
parallel_tool_calls: bool | None = None,
presence_penalty: float | None = None,
response_format: OpenAIResponseFormatParam | None = None,
seed: int | None = None,
stop: str | list[str] | None = None,
stream: bool | None = None,
stream_options: dict[str, Any] | None = None,
temperature: float | None = None,
tool_choice: str | dict[str, Any] | None = None,
tools: list[dict[str, Any]] | None = None,
top_logprobs: int | None = None,
top_p: float | None = None,
user: str | None = None,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
model_obj = await self._get_model(model)
# Ollama does not support image urls, so we need to download the image and convert it to base64
async def _convert_message(m: OpenAIMessageParam) -> OpenAIMessageParam:
if isinstance(m.content, list):
for c in m.content:
if c.type == "image_url" and c.image_url and c.image_url.url:
localize_result = await localize_image_content(c.image_url.url)
if localize_result is None:
raise ValueError(f"Failed to localize image content from {c.image_url.url}")
content, format = localize_result
c.image_url.url = f"data:image/{format};base64,{base64.b64encode(content).decode('utf-8')}"
return m
messages = [await _convert_message(m) for m in messages]
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
messages=messages,
frequency_penalty=frequency_penalty,
function_call=function_call,
functions=functions,
logit_bias=logit_bias,
logprobs=logprobs,
max_completion_tokens=max_completion_tokens,
max_tokens=max_tokens,
n=n,
parallel_tool_calls=parallel_tool_calls,
presence_penalty=presence_penalty,
response_format=response_format,
seed=seed,
stop=stop,
stream=stream,
stream_options=stream_options,
temperature=temperature,
tool_choice=tool_choice,
tools=tools,
top_logprobs=top_logprobs,
top_p=top_p,
user=user,
)
return await OpenAIMixin.openai_chat_completion(self, **params)
async def convert_message_to_openai_dict_for_ollama(message: Message) -> list[dict]:
async def _convert_content(content) -> dict:

View file

@ -21,8 +21,6 @@ logger = get_logger(name=__name__, category="inference::openai")
# | completion | LiteLLMOpenAIMixin |
# | chat_completion | LiteLLMOpenAIMixin |
# | embedding | LiteLLMOpenAIMixin |
# | batch_completion | LiteLLMOpenAIMixin |
# | batch_chat_completion | LiteLLMOpenAIMixin |
# | openai_completion | OpenAIMixin |
# | openai_chat_completion | OpenAIMixin |
# | openai_embeddings | OpenAIMixin |

View file

@ -805,6 +805,10 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
properties[param_name].update(description=param.description)
if param.default:
properties[param_name].update(default=param.default)
if param.items:
properties[param_name].update(items=param.items)
if param.title:
properties[param_name].update(title=param.title)
if param.required:
required.append(param_name)

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
import uuid
from abc import ABC, abstractmethod
from collections.abc import AsyncIterator
@ -26,6 +27,7 @@ from llama_stack.apis.models import ModelType
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
logger = get_logger(name=__name__, category="providers::utils")
@ -51,6 +53,10 @@ class OpenAIMixin(ModelRegistryHelper, ABC):
# This is useful for providers that do not return a unique id in the response.
overwrite_completion_id: bool = False
# Allow subclasses to control whether to download images and convert to base64
# for providers that require base64 encoded images instead of URLs.
download_images: bool = False
# Embedding model metadata for this provider
# Can be set by subclasses or instances to provide embedding models
# Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}}
@ -239,6 +245,24 @@ class OpenAIMixin(ModelRegistryHelper, ABC):
"""
Direct OpenAI chat completion API call.
"""
if self.download_images:
async def _localize_image_url(m: OpenAIMessageParam) -> OpenAIMessageParam:
if isinstance(m.content, list):
for c in m.content:
if c.type == "image_url" and c.image_url and c.image_url.url and "http" in c.image_url.url:
localize_result = await localize_image_content(c.image_url.url)
if localize_result is None:
raise ValueError(
f"Failed to localize image content from {c.image_url.url[:42]}{'...' if len(c.image_url.url) > 42 else ''}"
)
content, format = localize_result
c.image_url.url = f"data:image/{format};base64,{base64.b64encode(content).decode('utf-8')}"
# else it's a string and we don't need to modify it
return m
messages = [await _localize_image_url(m) for m in messages]
resp = await self.client.chat.completions.create(
**await prepare_openai_completion_params(
model=await self._get_provider_model_id(model),

View file

@ -192,6 +192,14 @@ async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
format = "png"
return content, format
elif uri.startswith("data"):
# data:image/{format};base64,{data}
match = re.match(r"data:image/(\w+);base64,(.+)", uri)
if not match:
raise ValueError(f"Invalid data URL format, {uri[:40]}...")
fmt, image_data = match.groups()
content = base64.b64decode(image_data)
return content, fmt
else:
return None

View file

@ -120,6 +120,10 @@ async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefs
name=param_name,
parameter_type=param_schema.get("type", "string"),
description=param_schema.get("description", ""),
required="default" not in param_schema,
items=param_schema.get("items", None),
title=param_schema.get("title", None),
default=param_schema.get("default", None),
)
)
tools.append(

View file

@ -28,7 +28,7 @@
"react-markdown": "^10.1.0",
"remark-gfm": "^4.0.1",
"remeda": "^2.32.0",
"shiki": "^1.29.2",
"shiki": "^3.13.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.3.1"
},
@ -51,7 +51,7 @@
"prettier": "3.6.2",
"tailwindcss": "^4",
"ts-node": "^10.9.2",
"tw-animate-css": "^1.2.9",
"tw-animate-css": "^1.4.0",
"typescript": "^5"
}
},
@ -3250,65 +3250,63 @@
"license": "MIT"
},
"node_modules/@shikijs/core": {
"version": "1.29.2",
"resolved": "https://registry.npmjs.org/@shikijs/core/-/core-1.29.2.tgz",
"integrity": "sha512-vju0lY9r27jJfOY4Z7+Rt/nIOjzJpZ3y+nYpqtUZInVoXQ/TJZcfGnNOGnKjFdVZb8qexiCuSlZRKcGfhhTTZQ==",
"version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/core/-/core-3.13.0.tgz",
"integrity": "sha512-3P8rGsg2Eh2qIHekwuQjzWhKI4jV97PhvYjYUzGqjvJfqdQPz+nMlfWahU24GZAyW1FxFI1sYjyhfh5CoLmIUA==",
"license": "MIT",
"dependencies": {
"@shikijs/engine-javascript": "1.29.2",
"@shikijs/engine-oniguruma": "1.29.2",
"@shikijs/types": "1.29.2",
"@shikijs/vscode-textmate": "^10.0.1",
"@shikijs/types": "3.13.0",
"@shikijs/vscode-textmate": "^10.0.2",
"@types/hast": "^3.0.4",
"hast-util-to-html": "^9.0.4"
"hast-util-to-html": "^9.0.5"
}
},
"node_modules/@shikijs/engine-javascript": {
"version": "1.29.2",
"resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-1.29.2.tgz",
"integrity": "sha512-iNEZv4IrLYPv64Q6k7EPpOCE/nuvGiKl7zxdq0WFuRPF5PAE9PRo2JGq/d8crLusM59BRemJ4eOqrFrC4wiQ+A==",
"version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-3.13.0.tgz",
"integrity": "sha512-Ty7xv32XCp8u0eQt8rItpMs6rU9Ki6LJ1dQOW3V/56PKDcpvfHPnYFbsx5FFUP2Yim34m/UkazidamMNVR4vKg==",
"license": "MIT",
"dependencies": {
"@shikijs/types": "1.29.2",
"@shikijs/vscode-textmate": "^10.0.1",
"oniguruma-to-es": "^2.2.0"
"@shikijs/types": "3.13.0",
"@shikijs/vscode-textmate": "^10.0.2",
"oniguruma-to-es": "^4.3.3"
}
},
"node_modules/@shikijs/engine-oniguruma": {
"version": "1.29.2",
"resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-1.29.2.tgz",
"integrity": "sha512-7iiOx3SG8+g1MnlzZVDYiaeHe7Ez2Kf2HrJzdmGwkRisT7r4rak0e655AcM/tF9JG/kg5fMNYlLLKglbN7gBqA==",
"version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-3.13.0.tgz",
"integrity": "sha512-O42rBGr4UDSlhT2ZFMxqM7QzIU+IcpoTMzb3W7AlziI1ZF7R8eS2M0yt5Ry35nnnTX/LTLXFPUjRFCIW+Operg==",
"license": "MIT",
"dependencies": {
"@shikijs/types": "1.29.2",
"@shikijs/vscode-textmate": "^10.0.1"
"@shikijs/types": "3.13.0",
"@shikijs/vscode-textmate": "^10.0.2"
}
},
"node_modules/@shikijs/langs": {
"version": "1.29.2",
"resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-1.29.2.tgz",
"integrity": "sha512-FIBA7N3LZ+223U7cJDUYd5shmciFQlYkFXlkKVaHsCPgfVLiO+e12FmQE6Tf9vuyEsFe3dIl8qGWKXgEHL9wmQ==",
"version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-3.13.0.tgz",
"integrity": "sha512-672c3WAETDYHwrRP0yLy3W1QYB89Hbpj+pO4KhxK6FzIrDI2FoEXNiNCut6BQmEApYLfuYfpgOZaqbY+E9b8wQ==",
"license": "MIT",
"dependencies": {
"@shikijs/types": "1.29.2"
"@shikijs/types": "3.13.0"
}
},
"node_modules/@shikijs/themes": {
"version": "1.29.2",
"resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-1.29.2.tgz",
"integrity": "sha512-i9TNZlsq4uoyqSbluIcZkmPL9Bfi3djVxRnofUHwvx/h6SRW3cwgBC5SML7vsDcWyukY0eCzVN980rqP6qNl9g==",
"version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-3.13.0.tgz",
"integrity": "sha512-Vxw1Nm1/Od8jyA7QuAenaV78BG2nSr3/gCGdBkLpfLscddCkzkL36Q5b67SrLLfvAJTOUzW39x4FHVCFriPVgg==",
"license": "MIT",
"dependencies": {
"@shikijs/types": "1.29.2"
"@shikijs/types": "3.13.0"
}
},
"node_modules/@shikijs/types": {
"version": "1.29.2",
"resolved": "https://registry.npmjs.org/@shikijs/types/-/types-1.29.2.tgz",
"integrity": "sha512-VJjK0eIijTZf0QSTODEXCqinjBn0joAHQ+aPSBzrv4O2d/QSbsMw+ZeSRx03kV34Hy7NzUvV/7NqfYGRLrASmw==",
"version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/types/-/types-3.13.0.tgz",
"integrity": "sha512-oM9P+NCFri/mmQ8LoFGVfVyemm5Hi27330zuOBp0annwJdKH1kOLndw3zCtAVDehPLg9fKqoEx3Ht/wNZxolfw==",
"license": "MIT",
"dependencies": {
"@shikijs/vscode-textmate": "^10.0.1",
"@shikijs/vscode-textmate": "^10.0.2",
"@types/hast": "^3.0.4"
}
},
@ -6084,12 +6082,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/emoji-regex-xs": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex-xs/-/emoji-regex-xs-1.0.0.tgz",
"integrity": "sha512-LRlerrMYoIDrT6jgpeZ2YYl/L8EulRTt5hQcYjy5AInh7HWXKimpqx68aknBFpGL2+/IcogTcaydJEgaTmOpDg==",
"license": "MIT"
},
"node_modules/encodeurl": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
@ -11813,15 +11805,21 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/oniguruma-parser": {
"version": "0.12.1",
"resolved": "https://registry.npmjs.org/oniguruma-parser/-/oniguruma-parser-0.12.1.tgz",
"integrity": "sha512-8Unqkvk1RYc6yq2WBYRj4hdnsAxVze8i7iPfQr8e4uSP3tRv0rpZcbGUDvxfQQcdwHt/e9PrMvGCsa8OqG9X3w==",
"license": "MIT"
},
"node_modules/oniguruma-to-es": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-2.3.0.tgz",
"integrity": "sha512-bwALDxriqfKGfUufKGGepCzu9x7nJQuoRoAFp4AnwehhC2crqrDIAP/uN2qdlsAvSMpeRC3+Yzhqc7hLmle5+g==",
"version": "4.3.3",
"resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-4.3.3.tgz",
"integrity": "sha512-rPiZhzC3wXwE59YQMRDodUwwT9FZ9nNBwQQfsd1wfdtlKEyCdRV0avrTcSZ5xlIvGRVPd/cx6ZN45ECmS39xvg==",
"license": "MIT",
"dependencies": {
"emoji-regex-xs": "^1.0.0",
"regex": "^5.1.1",
"regex-recursion": "^5.1.1"
"oniguruma-parser": "^0.12.1",
"regex": "^6.0.1",
"regex-recursion": "^6.0.2"
}
},
"node_modules/openid-client": {
@ -12613,21 +12611,20 @@
}
},
"node_modules/regex": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/regex/-/regex-5.1.1.tgz",
"integrity": "sha512-dN5I359AVGPnwzJm2jN1k0W9LPZ+ePvoOeVMMfqIMFz53sSwXkxaJoxr50ptnsC771lK95BnTrVSZxq0b9yCGw==",
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/regex/-/regex-6.0.1.tgz",
"integrity": "sha512-uorlqlzAKjKQZ5P+kTJr3eeJGSVroLKoHmquUj4zHWuR+hEyNqlXsSKlYYF5F4NI6nl7tWCs0apKJ0lmfsXAPA==",
"license": "MIT",
"dependencies": {
"regex-utilities": "^2.3.0"
}
},
"node_modules/regex-recursion": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/regex-recursion/-/regex-recursion-5.1.1.tgz",
"integrity": "sha512-ae7SBCbzVNrIjgSbh7wMznPcQel1DNlDtzensnFxpiNpXt1U2ju/bHugH422r+4LAVS1FpW1YCwilmnNsjum9w==",
"version": "6.0.2",
"resolved": "https://registry.npmjs.org/regex-recursion/-/regex-recursion-6.0.2.tgz",
"integrity": "sha512-0YCaSCq2VRIebiaUviZNs0cBz1kg5kVS2UKUfNIx8YVs1cN3AV7NTctO5FOKBA+UT2BPJIWZauYHPqJODG50cg==",
"license": "MIT",
"dependencies": {
"regex": "^5.1.1",
"regex-utilities": "^2.3.0"
}
},
@ -13165,18 +13162,18 @@
}
},
"node_modules/shiki": {
"version": "1.29.2",
"resolved": "https://registry.npmjs.org/shiki/-/shiki-1.29.2.tgz",
"integrity": "sha512-njXuliz/cP+67jU2hukkxCNuH1yUi4QfdZZY+sMr5PPrIyXSu5iTb/qYC4BiWWB0vZ+7TbdvYUCeL23zpwCfbg==",
"version": "3.13.0",
"resolved": "https://registry.npmjs.org/shiki/-/shiki-3.13.0.tgz",
"integrity": "sha512-aZW4l8Og16CokuCLf8CF8kq+KK2yOygapU5m3+hoGw0Mdosc6fPitjM+ujYarppj5ZIKGyPDPP1vqmQhr+5/0g==",
"license": "MIT",
"dependencies": {
"@shikijs/core": "1.29.2",
"@shikijs/engine-javascript": "1.29.2",
"@shikijs/engine-oniguruma": "1.29.2",
"@shikijs/langs": "1.29.2",
"@shikijs/themes": "1.29.2",
"@shikijs/types": "1.29.2",
"@shikijs/vscode-textmate": "^10.0.1",
"@shikijs/core": "3.13.0",
"@shikijs/engine-javascript": "3.13.0",
"@shikijs/engine-oniguruma": "3.13.0",
"@shikijs/langs": "3.13.0",
"@shikijs/themes": "3.13.0",
"@shikijs/types": "3.13.0",
"@shikijs/vscode-textmate": "^10.0.2",
"@types/hast": "^3.0.4"
}
},
@ -13970,9 +13967,9 @@
"license": "0BSD"
},
"node_modules/tw-animate-css": {
"version": "1.2.9",
"resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz",
"integrity": "sha512-9O4k1at9pMQff9EAcCEuy1UNO43JmaPQvq+0lwza9Y0BQ6LB38NiMj+qHqjoQf40355MX+gs6wtlR6H9WsSXFg==",
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.4.0.tgz",
"integrity": "sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==",
"dev": true,
"license": "MIT",
"funding": {

View file

@ -33,7 +33,7 @@
"react-markdown": "^10.1.0",
"remark-gfm": "^4.0.1",
"remeda": "^2.32.0",
"shiki": "^1.29.2",
"shiki": "^3.13.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.3.1"
},
@ -56,7 +56,7 @@
"prettier": "3.6.2",
"tailwindcss": "^4",
"ts-node": "^10.9.2",
"tw-animate-css": "^1.2.9",
"tw-animate-css": "^1.4.0",
"typescript": "^5"
}
}

View file

@ -167,6 +167,8 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
from starlette.responses import Response
from starlette.routing import Mount, Route
from llama_stack.log import get_logger
server = FastMCP("FastMCP Test Server", log_level="WARNING")
tools = tools or default_tools()
@ -211,6 +213,7 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
return sock.getsockname()[1]
port = get_open_port()
logger = get_logger(__name__, category="tests::mcp")
# make uvicorn logs be less verbose
config = uvicorn.Config(app, host="0.0.0.0", port=port, log_level="warning")
@ -218,10 +221,17 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
app.state.uvicorn_server = server_instance
def run_server():
server_instance.run()
try:
logger.info(f"Starting MCP server on port {port}")
server_instance.run()
logger.info(f"MCP server on port {port} has stopped")
except Exception as e:
logger.error(f"MCP server failed to start on port {port}: {e}")
raise
# Start the server in a new thread
server_thread = threading.Thread(target=run_server, daemon=True)
logger.info(f"Starting MCP server thread on port {port}")
server_thread.start()
# Polling until the server is ready
@ -229,24 +239,36 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
start_time = time.time()
server_url = f"http://localhost:{port}/sse"
logger.info(f"Waiting for MCP server to be ready at {server_url}")
while time.time() - start_time < timeout:
try:
response = httpx.get(server_url)
if response.status_code in [200, 401]:
logger.info(f"MCP server is ready on port {port} (status: {response.status_code})")
break
except httpx.RequestError:
except httpx.RequestError as e:
logger.debug(f"Server not ready yet, retrying... ({e})")
pass
time.sleep(0.1)
else:
# If we exit the loop due to timeout
logger.error(f"MCP server failed to start within {timeout} seconds on port {port}")
logger.error(f"Thread alive: {server_thread.is_alive()}")
if server_thread.is_alive():
logger.error("Server thread is still running but not responding to HTTP requests")
try:
yield {"server_url": server_url}
finally:
logger.info(f"Shutting down MCP server on port {port}")
server_instance.should_exit = True
time.sleep(0.5)
# Force shutdown if still running
if server_thread.is_alive():
try:
logger.info("Force shutting down server thread")
if hasattr(server_instance, "servers") and server_instance.servers:
for srv in server_instance.servers:
srv.close()
@ -254,9 +276,9 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
# Wait for graceful shutdown
server_thread.join(timeout=3)
if server_thread.is_alive():
print("Warning: Server thread still alive after shutdown attempt")
logger.warning("Server thread still alive after shutdown attempt")
except Exception as e:
print(f"Error during server shutdown: {e}")
logger.error(f"Error during server shutdown: {e}")
# CRITICAL: Reset SSE global state to prevent event loop contamination
# Reset the SSE AppStatus singleton that stores anyio.Event objects

View file

@ -1,76 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
from ..test_cases.test_case import TestCase
def skip_if_provider_doesnt_support_batch_inference(client_with_models, model_id):
models = {m.identifier: m for m in client_with_models.models.list()}
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
if provider.provider_type not in ("inline::meta-reference",):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support batch inference")
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:batch_completion",
],
)
def test_batch_completion_non_streaming(client_with_models, text_model_id, test_case):
skip_if_provider_doesnt_support_batch_inference(client_with_models, text_model_id)
tc = TestCase(test_case)
content_batch = tc["contents"]
response = client_with_models.inference.batch_completion(
content_batch=content_batch,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
},
)
assert len(response.batch) == len(content_batch)
for i, r in enumerate(response.batch):
print(f"response {i}: {r.content}")
assert len(r.content) > 10
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:batch_completion",
],
)
def test_batch_chat_completion_non_streaming(client_with_models, text_model_id, test_case):
skip_if_provider_doesnt_support_batch_inference(client_with_models, text_model_id)
tc = TestCase(test_case)
qa_pairs = tc["qa_pairs"]
message_batch = [
[
{
"role": "user",
"content": qa["question"],
}
]
for qa in qa_pairs
]
response = client_with_models.inference.batch_chat_completion(
messages_batch=message_batch,
model_id=text_model_id,
)
assert len(response.batch) == len(qa_pairs)
for i, r in enumerate(response.batch):
print(f"response {i}: {r.completion_message.content}")
assert len(r.completion_message.content) > 0
assert qa_pairs[i]["answer"].lower() in r.completion_message.content.lower()

View file

@ -1,303 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
#
# Test plan:
#
# Types of input:
# - array of a string
# - array of a image (ImageContentItem, either URL or base64 string)
# - array of a text (TextContentItem)
# Types of output:
# - list of list of floats
# Params:
# - text_truncation
# - absent w/ long text -> error
# - none w/ long text -> error
# - absent w/ short text -> ok
# - none w/ short text -> ok
# - end w/ long text -> ok
# - end w/ short text -> ok
# - start w/ long text -> ok
# - start w/ short text -> ok
# - output_dimension
# - response dimension matches
# - task_type, only for asymmetric models
# - query embedding != passage embedding
# Negative:
# - long string
# - long text
#
# Todo:
# - negative tests
# - empty
# - empty list
# - empty string
# - empty text
# - empty image
# - long
# - large image
# - appropriate combinations
# - batch size
# - many inputs
# - invalid
# - invalid URL
# - invalid base64
#
# Notes:
# - use llama_stack_client fixture
# - use pytest.mark.parametrize when possible
# - no accuracy tests: only check the type of output, not the content
#
import pytest
from llama_stack_client import BadRequestError as LlamaStackBadRequestError
from llama_stack_client.types import EmbeddingsResponse
from llama_stack_client.types.shared.interleaved_content import (
ImageContentItem,
ImageContentItemImage,
ImageContentItemImageURL,
TextContentItem,
)
from openai import BadRequestError as OpenAIBadRequestError
from llama_stack.core.library_client import LlamaStackAsLibraryClient
DUMMY_STRING = "hello"
DUMMY_STRING2 = "world"
DUMMY_LONG_STRING = "NVDA " * 10240
DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
DUMMY_LONG_TEXT = TextContentItem(text=DUMMY_LONG_STRING, type="text")
# TODO(mf): add a real image URL and base64 string
DUMMY_IMAGE_URL = ImageContentItem(
image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
)
DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
SUPPORTED_PROVIDERS = {"remote::nvidia"}
MODELS_SUPPORTING_MEDIA = {}
MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"}
MODELS_REQUIRING_TASK_TYPE = {
"nvidia/llama-3.2-nv-embedqa-1b-v2",
"nvidia/nv-embedqa-e5-v5",
"nvidia/nv-embedqa-mistral-7b-v2",
"snowflake/arctic-embed-l",
}
MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE
def default_task_type(model_id):
"""
Some models require a task type parameter. This provides a default value for
testing those models.
"""
if model_id in MODELS_REQUIRING_TASK_TYPE:
return {"task_type": "query"}
return {}
@pytest.mark.parametrize(
"contents",
[
[DUMMY_STRING, DUMMY_STRING2],
[DUMMY_TEXT, DUMMY_TEXT2],
],
ids=[
"list[string]",
"list[text]",
],
)
def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_IMAGE_URL, DUMMY_IMAGE_BASE64],
[DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT],
],
ids=[
"list[url,base64]",
"list[url,string,base64,text]",
],
)
def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
if embedding_model_id not in MODELS_SUPPORTING_MEDIA:
pytest.xfail(f"{embedding_model_id} doesn't support media")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
"end",
"start",
],
)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_LONG_TEXT],
[DUMMY_STRING],
],
ids=[
"long",
"short",
],
)
def test_embedding_truncation(
llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=contents,
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == 1
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
None,
"none",
],
)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_LONG_TEXT],
[DUMMY_LONG_STRING],
],
ids=[
"long-text",
"long-str",
],
)
def test_embedding_truncation_error(
llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
# Using LlamaStackClient from llama_stack_client will raise llama_stack_client.BadRequestError
# While using LlamaStackAsLibraryClient from llama_stack.distribution.library_client will raise the error that the backend raises
error_type = (
OpenAIBadRequestError
if isinstance(llama_stack_client, LlamaStackAsLibraryClient)
else LlamaStackBadRequestError
)
with pytest.raises(error_type):
llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_LONG_TEXT],
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)
def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION:
pytest.xfail(f"{embedding_model_id} doesn't support output_dimension")
base_response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id)
)
test_response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_STRING],
**default_task_type(embedding_model_id),
output_dimension=32,
)
assert len(base_response.embeddings[0]) != len(test_response.embeddings[0])
assert len(test_response.embeddings[0]) == 32
def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE:
pytest.xfail(f"{embedding_model_id} doesn't support task_type")
query_embedding = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
)
document_embedding = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="document"
)
assert query_embedding.embeddings != document_embedding.embeddings
@pytest.mark.parametrize(
"text_truncation",
[
None,
"none",
"end",
"start",
],
)
def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_STRING],
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == 1
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
"NONE",
"END",
"START",
"left",
"right",
],
)
def test_embedding_text_truncation_error(
llama_stack_client, embedding_model_id, text_truncation, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
error_type = ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
with pytest.raises(error_type):
llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_STRING],
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)

View file

@ -9,6 +9,7 @@ import time
import unicodedata
import pytest
from pydantic import BaseModel
from ..test_cases.test_case import TestCase
@ -62,6 +63,14 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
def skip_if_doesnt_support_completions_logprobs(client_with_models, model_id):
provider_type = provider_from_model(client_with_models, model_id).provider_type
if provider_type in (
"remote::ollama", # logprobs is ignored
):
pytest.skip(f"Model {model_id} hosted by {provider_type} doesn't support /v1/completions logprobs.")
def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
# To test `fim` ( fill in the middle ) completion, we need to use a model that supports suffix.
# Use this to specifically test this API functionality.
@ -205,28 +214,6 @@ def test_openai_completion_streaming(llama_stack_client, client_with_models, tex
assert len(content_str) > 10
@pytest.mark.parametrize(
"prompt_logprobs",
[
1,
0,
],
)
def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs):
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
prompt = "Hello, world!"
response = llama_stack_client.completions.create(
model=text_model_id,
prompt=prompt,
stream=False,
prompt_logprobs=prompt_logprobs,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert len(choice.prompt_logprobs) > 0
def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id):
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
@ -518,3 +505,214 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
message_content = response.choices[0].message.content.lower().strip()
normalized_content = _normalize_text(message_content)
assert "hello world" in normalized_content
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:stop_sequence",
],
)
def test_openai_completion_stop_sequence(client_with_models, openai_client, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
stop="1963",
stream=False,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert "1963" not in choice.text
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
stop=["blathering", "1963"],
stream=False,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert "1963" not in choice.text
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_openai_completion_logprobs(client_with_models, openai_client, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
tc = TestCase(test_case)
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
logprobs=5,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert choice.text, "Response text should not be empty"
assert choice.logprobs, "Logprobs should not be empty"
logprobs = choice.logprobs
assert logprobs.token_logprobs, "Response tokens should not be empty"
assert len(logprobs.tokens) == len(logprobs.token_logprobs)
assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
assert logprobs.top_logprobs[i][token] == prob
assert len(logprobs.top_logprobs[i]) == 5
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_openai_completion_logprobs_streaming(client_with_models, openai_client, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
tc = TestCase(test_case)
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
logprobs=3,
stream=True,
max_tokens=5,
)
for chunk in response:
choice = chunk.choices[0]
choice = response.choices[0]
if choice.text: # if there's a token, we expect logprobs
assert choice.logprobs, "Logprobs should not be empty"
logprobs = choice.logprobs
assert logprobs.token_logprobs, "Response tokens should not be empty"
assert len(logprobs.tokens) == len(logprobs.token_logprobs)
assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
assert logprobs.top_logprobs[i][token] == prob
assert len(logprobs.top_logprobs[i]) == 3
else: # no token, no logprobs
assert not choice.logprobs, "Logprobs should be empty"
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_openai_chat_completion_with_tools(openai_client, text_model_id, test_case):
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=False,
)
assert len(response.choices) == 1
assert len(response.choices[0].message.tool_calls) == 1
tool_call = response.choices[0].message.tool_calls[0]
assert tool_call.function.name == tc["tools"][0]["function"]["name"]
assert "location" in tool_call.function.arguments
assert tc["expected"]["location"] in tool_call.function.arguments
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_openai_chat_completion_with_tools_and_streaming(openai_client, text_model_id, test_case):
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=True,
)
# Accumulate tool calls from streaming chunks
tool_calls = []
for chunk in response:
if chunk.choices and chunk.choices[0].delta.tool_calls:
for i, tc_delta in enumerate(chunk.choices[0].delta.tool_calls):
while len(tool_calls) <= i:
tool_calls.append({"function": {"name": "", "arguments": ""}})
if tc_delta.function and tc_delta.function.name:
tool_calls[i]["function"]["name"] = tc_delta.function.name
if tc_delta.function and tc_delta.function.arguments:
tool_calls[i]["function"]["arguments"] += tc_delta.function.arguments
assert len(tool_calls) == 1
tool_call = tool_calls[0]
assert tool_call["function"]["name"] == tc["tools"][0]["function"]["name"]
assert "location" in tool_call["function"]["arguments"]
assert tc["expected"]["location"] in tool_call["function"]["arguments"]
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_openai_chat_completion_with_tool_choice_none(openai_client, text_model_id, test_case):
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="none",
stream=False,
)
assert len(response.choices) == 1
tool_calls = response.choices[0].message.tool_calls
assert tool_calls is None or len(tool_calls) == 0
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:structured_output",
],
)
def test_openai_chat_completion_structured_output(openai_client, text_model_id, test_case):
# Note: Skip condition may need adjustment for OpenAI client
class AnswerFormat(BaseModel):
first_name: str
last_name: str
year_of_birth: int
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
response_format={
"type": "json_schema",
"json_schema": {
"name": "AnswerFormat",
"schema": AnswerFormat.model_json_schema(),
},
},
stream=False,
)
print(response.choices[0].message.content)
answer = AnswerFormat.model_validate_json(response.choices[0].message.content)
expected = tc["expected"]
assert answer.first_name == expected["first_name"]
assert answer.last_name == expected["last_name"]
assert answer.year_of_birth == expected["year_of_birth"]

View file

@ -0,0 +1,77 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
import pathlib
import pytest
@pytest.fixture
def image_path():
return pathlib.Path(__file__).parent / "dog.png"
@pytest.fixture
def base64_image_data(image_path):
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
async def test_openai_chat_completion_image_url(openai_client, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = openai_client.chat.completions.create(
model=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
async def test_openai_chat_completion_image_data(openai_client, vision_model_id, base64_image_data):
message = {
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image_data}",
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = openai_client.chat.completions.create(
model=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})

View file

@ -1,545 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from time import sleep
import pytest
from pydantic import BaseModel
from llama_stack.models.llama.sku_list import resolve_model
from ..test_cases.test_case import TestCase
PROVIDER_LOGPROBS_TOP_K = {"remote::together", "remote::fireworks", "remote::vllm"}
def skip_if_model_doesnt_support_completion(client_with_models, model_id):
models = {m.identifier: m for m in client_with_models.models.list()}
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
if (
provider.provider_type
in (
"remote::openai",
"remote::anthropic",
"remote::gemini",
"remote::vertexai",
"remote::groq",
"remote::sambanova",
"remote::azure",
)
or "openai-compat" in provider.provider_type
):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")
def skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, model_id):
models = {m.identifier: m for m in client_with_models.models.list()}
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
if provider.provider_type in ("remote::sambanova", "remote::azure", "remote::watsonx"):
pytest.skip(
f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
)
def get_llama_model(client_with_models, model_id):
models = {}
for m in client_with_models.models.list():
models[m.identifier] = m
models[m.provider_resource_id] = m
assert model_id in models, f"Model {model_id} not found"
model = models[model_id]
ids = (model.identifier, model.provider_resource_id)
for mid in ids:
if resolve_model(mid):
return mid
return model.metadata.get("llama_model", None)
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:sanity",
],
)
def test_text_completion_non_streaming(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=False,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
},
)
assert len(response.content) > 10
# assert "blue" in response.content.lower().strip()
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:sanity",
],
)
def test_text_completion_streaming(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=True,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
},
)
streamed_content = [chunk.delta for chunk in response]
content_str = "".join(streamed_content).lower().strip()
# assert "blue" in content_str
assert len(content_str) > 10
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:stop_sequence",
],
)
def test_text_completion_stop_sequence(client_with_models, text_model_id, inference_provider_type, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
# This is only supported/tested for remote vLLM: https://github.com/meta-llama/llama-stack/issues/1771
if inference_provider_type != "remote::vllm":
pytest.xfail(f"{inference_provider_type} doesn't support 'stop' parameter yet")
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=True,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
"stop": ["1963"],
},
)
streamed_content = [chunk.delta for chunk in response]
content_str = "".join(streamed_content).lower().strip()
assert "1963" not in content_str
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_text_completion_log_probs_non_streaming(client_with_models, text_model_id, inference_provider_type, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
if inference_provider_type not in PROVIDER_LOGPROBS_TOP_K:
pytest.xfail(f"{inference_provider_type} doesn't support log probs yet")
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=False,
model_id=text_model_id,
sampling_params={
"max_tokens": 5,
},
logprobs={
"top_k": 1,
},
)
assert response.logprobs, "Logprobs should not be empty"
assert 1 <= len(response.logprobs) <= 5 # each token has 1 logprob and here max_tokens=5
assert all(len(logprob.logprobs_by_token) == 1 for logprob in response.logprobs)
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_text_completion_log_probs_streaming(client_with_models, text_model_id, inference_provider_type, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
if inference_provider_type not in PROVIDER_LOGPROBS_TOP_K:
pytest.xfail(f"{inference_provider_type} doesn't support log probs yet")
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=True,
model_id=text_model_id,
sampling_params={
"max_tokens": 5,
},
logprobs={
"top_k": 1,
},
)
streamed_content = list(response)
for chunk in streamed_content:
if chunk.delta: # if there's a token, we expect logprobs
assert chunk.logprobs, "Logprobs should not be empty"
assert all(len(logprob.logprobs_by_token) == 1 for logprob in chunk.logprobs)
else: # no token, no logprobs
assert not chunk.logprobs, "Logprobs should be empty"
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:structured_output",
],
)
def test_text_completion_structured_output(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
class AnswerFormat(BaseModel):
name: str
year_born: str
year_retired: str
tc = TestCase(test_case)
user_input = tc["user_input"]
response = client_with_models.inference.completion(
model_id=text_model_id,
content=user_input,
stream=False,
sampling_params={
"max_tokens": 50,
},
response_format={
"type": "json_schema",
"json_schema": AnswerFormat.model_json_schema(),
},
)
answer = AnswerFormat.model_validate_json(response.content)
expected = tc["expected"]
assert answer.name == expected["name"]
assert answer.year_born == expected["year_born"]
assert answer.year_retired == expected["year_retired"]
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:non_streaming_01",
"inference:chat_completion:non_streaming_02",
],
)
def test_text_chat_completion_non_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
question = tc["question"]
expected = tc["expected"]
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=[
{
"role": "user",
"content": question,
}
],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0
assert expected.lower() in message_content
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:streaming_01",
"inference:chat_completion:streaming_02",
],
)
def test_text_chat_completion_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
question = tc["question"]
expected = tc["expected"]
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=[{"role": "user", "content": question}],
stream=True,
timeout=120, # Increase timeout to 2 minutes for large conversation history
)
streamed_content = [str(chunk.event.delta.text.lower().strip()) for chunk in response]
assert len(streamed_content) > 0
assert expected.lower() in "".join(streamed_content)
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_calling_and_non_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=False,
)
# some models can return content for the response in addition to the tool call
assert response.completion_message.role == "assistant"
assert len(response.completion_message.tool_calls) == 1
assert response.completion_message.tool_calls[0].tool_name == tc["tools"][0]["tool_name"]
assert response.completion_message.tool_calls[0].arguments == tc["expected"]
# Will extract streamed text and separate it from tool invocation content
# The returned tool inovcation content will be a string so it's easy to comapare with expected value
# e.g. "[get_weather, {'location': 'San Francisco, CA'}]"
def extract_tool_invocation_content(response):
tool_invocation_content: str = ""
for chunk in response:
delta = chunk.event.delta
if delta.type == "tool_call" and delta.parse_status == "succeeded":
call = delta.tool_call
tool_invocation_content += f"[{call.tool_name}, {call.arguments}]"
return tool_invocation_content
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_calling_and_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=True,
)
tool_invocation_content = extract_tool_invocation_content(response)
expected_tool_name = tc["tools"][0]["tool_name"]
expected_argument = tc["expected"]
assert tool_invocation_content == f"[{expected_tool_name}, {expected_argument}]"
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_choice_required(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_config={
"tool_choice": "required",
},
stream=True,
)
tool_invocation_content = extract_tool_invocation_content(response)
expected_tool_name = tc["tools"][0]["tool_name"]
expected_argument = tc["expected"]
assert tool_invocation_content == f"[{expected_tool_name}, {expected_argument}]"
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_choice_none(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_config={"tool_choice": "none"},
stream=True,
)
tool_invocation_content = extract_tool_invocation_content(response)
assert tool_invocation_content == ""
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:structured_output",
],
)
def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
class NBAStats(BaseModel):
year_for_draft: int
num_seasons_in_nba: int
class AnswerFormat(BaseModel):
first_name: str
last_name: str
year_of_birth: int
nba_stats: NBAStats
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
response_format={
"type": "json_schema",
"json_schema": AnswerFormat.model_json_schema(),
},
stream=False,
)
answer = AnswerFormat.model_validate_json(response.completion_message.content)
expected = tc["expected"]
assert answer.first_name == expected["first_name"]
assert answer.last_name == expected["last_name"]
assert answer.year_of_birth == expected["year_of_birth"]
assert answer.nba_stats.num_seasons_in_nba == expected["num_seasons_in_nba"]
assert answer.nba_stats.year_for_draft == expected["year_for_draft"]
@pytest.mark.parametrize("streaming", [True, False])
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling_tools_absent",
],
)
def test_text_chat_completion_tool_calling_tools_not_in_request(
client_with_models, text_model_id, test_case, streaming
):
tc = TestCase(test_case)
# TODO: more dynamic lookup on tool_prompt_format for model family
tool_prompt_format = "json" if "3.1" in text_model_id else "python_list"
request = {
"model_id": text_model_id,
"messages": tc["messages"],
"tools": tc["tools"],
"tool_choice": "auto",
"tool_prompt_format": tool_prompt_format,
"stream": streaming,
}
response = client_with_models.inference.chat_completion(**request)
if streaming:
for chunk in response:
delta = chunk.event.delta
if delta.type == "tool_call" and delta.parse_status == "succeeded":
assert delta.tool_call.tool_name == "get_object_namespace_list"
if delta.type == "tool_call" and delta.parse_status == "failed":
# expect raw message that failed to parse in tool_call
assert isinstance(delta.tool_call, str)
assert len(delta.tool_call) > 0
else:
for tc in response.completion_message.tool_calls:
assert tc.tool_name == "get_object_namespace_list"
@pytest.mark.parametrize(
"test_case",
[
# Tests if the model can handle simple messages like "Hi" or
# a message unrelated to one of the tool calls
"inference:chat_completion:text_then_tool",
# Tests if the model can do full tool call with responses correctly
"inference:chat_completion:tool_then_answer",
# Tests if model can generate multiple params and
# read outputs correctly
"inference:chat_completion:array_parameter",
],
)
def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
"""This test tests the model's tool calling loop in various scenarios"""
if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
pytest.xfail("Not tested for non-llama4 models yet")
tc = TestCase(test_case)
messages = []
# keep going until either
# 1. we have messages to test in multi-turn
# 2. no messages bust last message is tool response
while len(tc["messages"]) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"):
# do not take new messages if last message is tool response
if len(messages) == 0 or messages[-1]["role"] != "tool":
new_messages = tc["messages"].pop(0)
messages += new_messages
# pprint(messages)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=messages,
tools=tc["tools"],
stream=False,
sampling_params={
"strategy": {
"type": "top_p",
"top_p": 0.9,
"temperature": 0.6,
}
},
)
op_msg = response.completion_message
messages.append(op_msg.model_dump())
# print(op_msg)
assert op_msg.role == "assistant"
expected = tc["expected"].pop(0)
assert len(op_msg.tool_calls) == expected["num_tool_calls"]
if expected["num_tool_calls"] > 0:
assert op_msg.tool_calls[0].tool_name == expected["tool_name"]
assert op_msg.tool_calls[0].arguments == expected["tool_arguments"]
tool_response = tc["tool_responses"].pop(0)
messages.append(
# Tool Response Message
{
"role": "tool",
"call_id": op_msg.tool_calls[0].call_id,
"content": tool_response["response"],
}
)
else:
actual_answer = op_msg.content.lower()
# pprint(actual_answer)
assert expected["answer"] in actual_answer
# sleep to avoid rate limit
sleep(1)

View file

@ -25,16 +25,19 @@ def base64_image_data(image_path):
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
@pytest.fixture
def base64_image_url(base64_image_data):
return f"data:image/png;base64,{base64_image_data}"
def test_image_chat_completion_non_streaming(client_with_models, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
"uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
},
{
@ -43,12 +46,12 @@ def test_image_chat_completion_non_streaming(client_with_models, vision_model_id
},
],
}
response = client_with_models.inference.chat_completion(
model_id=vision_model_id,
response = client_with_models.chat.completions.create(
model=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
@ -68,8 +71,13 @@ def multi_image_data():
return encoded_files
@pytest.fixture
def multi_image_url(multi_image_data):
return [f"data:image/jpeg;base64,{data}" for data in multi_image_data]
@pytest.mark.parametrize("stream", [True, False])
def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream):
def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_url, stream):
supported_models = ["llama-4", "gpt-4o", "llama4"]
if not any(model in vision_model_id.lower() for model in supported_models):
pytest.skip(
@ -81,15 +89,15 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
"role": "user",
"content": [
{
"type": "image",
"image": {
"data": multi_image_data[0],
"type": "image_url",
"image_url": {
"url": multi_image_url[0],
},
},
{
"type": "image",
"image": {
"data": multi_image_data[1],
"type": "image_url",
"image_url": {
"url": multi_image_url[1],
},
},
{
@ -99,17 +107,17 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
],
},
]
response = client_with_models.inference.chat_completion(
model_id=vision_model_id,
response = client_with_models.chat.completions.create(
model=vision_model_id,
messages=messages,
stream=stream,
)
if stream:
message_content = ""
for chunk in response:
message_content += chunk.event.delta.text
message_content += chunk.choices[0].delta.content
else:
message_content = response.completion_message.content
message_content = response.choices[0].message.content
assert len(message_content) > 0
assert any(expected in message_content.lower().strip() for expected in {"bedroom"}), message_content
@ -125,17 +133,17 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
"role": "user",
"content": [
{
"type": "image",
"image": {
"data": multi_image_data[2],
"type": "image_url",
"image_url": {
"url": multi_image_data[2],
},
},
{"type": "text", "text": "How about this one?"},
],
},
)
response = client_with_models.inference.chat_completion(
model_id=vision_model_id,
response = client_with_models.chat.completions.create(
model=vision_model_id,
messages=messages,
stream=stream,
)
@ -144,7 +152,7 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
for chunk in response:
message_content += chunk.event.delta.text
else:
message_content = response.completion_message.content
message_content = response.choices[0].message.content
assert len(message_content) > 0
assert any(expected in message_content.lower().strip() for expected in {"sword", "shield"}), message_content
@ -154,11 +162,9 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
"uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
},
{
@ -167,23 +173,23 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
},
],
}
response = client_with_models.inference.chat_completion(
model_id=vision_model_id,
response = client_with_models.chat.completions.create(
model=vision_model_id,
messages=[message],
stream=True,
)
streamed_content = ""
for chunk in response:
streamed_content += chunk.event.delta.text.lower()
streamed_content += chunk.choices[0].delta.content.lower()
assert len(streamed_content) > 0
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_data):
def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_url):
image_spec = {
"type": "image",
"image": {
"data": base64_image_data,
"type": "image_url",
"image_url": {
"url": base64_image_url,
},
}
@ -197,10 +203,10 @@ def test_image_chat_completion_base64(client_with_models, vision_model_id, base6
},
],
}
response = client_with_models.inference.chat_completion(
model_id=vision_model_id,
response = client_with_models.chat.completions.create(
model=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0

View file

@ -14,6 +14,13 @@ from . import skip_in_github_actions
# LLAMA_STACK_CONFIG="nvidia" pytest -v tests/integration/providers/nvidia/test_datastore.py
@pytest.fixture(autouse=True)
def skip_if_no_nvidia_provider(llama_stack_client):
provider_types = {p.provider_type for p in llama_stack_client.providers.list() if p.api == "datasetio"}
if "remote::nvidia" not in provider_types:
pytest.skip("datasetio=remote::nvidia provider not configured, skipping")
# nvidia provider only
@skip_in_github_actions
@pytest.mark.parametrize(

View file

@ -0,0 +1,167 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"greet_everyone\",\n \"description\": \"\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"url\"],\n \"properties\": {\n \"url\": {\n \"type\": \"string\",\n \"description\": \"\"\n }\n }\n }\n },\n {\n \"name\": \"get_boiling_point\",\n \"description\": \"\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n \",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"liquid_name\"],\n \"properties\": {\n \"liquid_name\": {\n \"type\": \"string\",\n \"description\": \"\"\n },\n \"celsius\": {\n \"type\": \"boolean\",\n \"description\": \"\",\n \"default\": \"True\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[greet_everyone(url=\"world\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.663224Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "How",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.706706Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " can",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.751075Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " I",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.794187Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " assist",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.837831Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " you",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.879926Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " further",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.92182Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "?",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.963339Z",
"done": true,
"done_reason": "stop",
"total_duration": 492973041,
"load_duration": 103979375,
"prompt_eval_count": 482,
"prompt_eval_duration": 87032041,
"eval_count": 8,
"eval_duration": 300586375,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,31 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": [],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 0,
"total_tokens": 0,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,89 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
},
{
"role": "user",
"content": "Please give me information about Michael Jordan."
}
],
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "AnswerFormat",
"schema": {
"properties": {
"first_name": {
"title": "First Name",
"type": "string"
},
"last_name": {
"title": "Last Name",
"type": "string"
},
"year_of_birth": {
"title": "Year Of Birth",
"type": "integer"
}
},
"required": [
"first_name",
"last_name",
"year_of_birth"
],
"title": "AnswerFormat",
"type": "object"
}
}
},
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-433",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}\n\n \t\t\t\t\t\t\t\t\t\t\t \t\t ",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758979490,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 31,
"prompt_tokens": 60,
"total_tokens": 91,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,31 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": [],
"encoding_format": "base64"
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 0,
"total_tokens": 0,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,316 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "What's the name of the Sun in latin?"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": "The Latin",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " name",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " for",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " Sun",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " \"",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": "Sol",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": "\".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 11,
"prompt_tokens": 20,
"total_tokens": 31,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,44 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Hello, world!",
"logprobs": false,
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-74",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Hello! How can I assist you today?"
}
],
"created": 1758975636,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 29,
"total_tokens": 39,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,92 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"stream": false,
"tool_choice": "auto",
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-761",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "call_cj8ownwc",
"function": {
"arguments": "{\"location\":\"San Francisco, CA\"}",
"name": "get_weather"
},
"type": "function",
"index": 0
}
]
}
}
],
"created": 1758975113,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 18,
"prompt_tokens": 185,
"total_tokens": 203,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,44 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Hello, world!",
"logprobs": true,
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-809",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Hello! It's nice to meet you. Is there anything I can help you with or would you like to chat?"
}
],
"created": 1758975633,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 25,
"prompt_tokens": 29,
"total_tokens": 54,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,550 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "What is the name of the US captial?"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": "The name",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " US",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " capital",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " Washington",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ",",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " D",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ".C",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " (",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": "short",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " for",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " District",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " Columbia",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ").",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 20,
"prompt_tokens": 20,
"total_tokens": 40,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,60 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-123",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! As of my knowledge cutoff on December 15th, I have the latest information for you. However, please note that my data may not be entirely up-to-date.\n\nCurrently, and based on historical climate patterns, it appears to be a partly cloudy day with mild temperatures in San Francisco, CA. Expect a temperature range of around 48\u00b0F (9\u00b0C) to 54\u00b0F (12\u00b0C). It's likely to be a breezy day, with winds blowing at about 13 mph (21 km/h).\n\nHowever, if I were to look into more recent weather patterns or forecasts, I would recommend checking the latest conditions directly from reliable sources such as the National Weather Service or local news outlets for more accurate and up-to-date information.\n\nPlease let me know how I can further assist you.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758978071,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 163,
"prompt_tokens": 45,
"total_tokens": 208,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": "Test dimensions parameter",
"encoding_format": "float",
"dimensions": 16
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.9296875,
5.1875,
-2.140625,
0.171875,
-2.25,
-0.8359375,
-0.828125,
1.15625,
2.328125,
-1.0078125,
-3.0,
4.09375,
0.8359375,
0.1015625,
2.015625,
-1.0859375
],
"index": 0,
"object": "embedding",
"raw_output": null
}
],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,39 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": "Test dimensions parameter",
"encoding_format": "base64",
"dimensions": 16
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": "AABuvwAApkAAAAnAAAAwPgAAEMAAAFa/AABUvwAAlD8AABVAAACBvwAAQMAAAINAAABWPwAA0D0AAAFAAACLvw==",
"index": 0,
"object": "embedding",
"raw_output": null
}
],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "1d64ff81-b7c4-40c6-9509-cca71759da3e",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920401,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 26,
"prompt_tokens": 14,
"total_tokens": 40,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,347 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"greet_everyone\",\n \"description\": \"\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"url\"],\n \"properties\": {\n \"url\": {\n \"type\": \"string\",\n \"description\": \"\"\n }\n }\n }\n },\n {\n \"name\": \"get_boiling_point\",\n \"description\": \"\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n \",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"liquid_name\"],\n \"properties\": {\n \"liquid_name\": {\n \"type\": \"string\",\n \"description\": \"\"\n },\n \"celsius\": {\n \"type\": \"boolean\",\n \"description\": \"\",\n \"default\": \"True\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[greet_everyone(url=\"world\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHow can I assist you further?<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the boiling point of polyjuice? Use tools to answer.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.177453Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.220271Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "get",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.261232Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_bo",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.302818Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "iling",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.344343Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_point",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.386025Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "(",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.42778Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "liquid",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.469673Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_name",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.512543Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "='",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.554479Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "poly",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.597092Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "ju",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.639581Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "ice",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.683223Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "',",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.72556Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " c",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.768012Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "elsius",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.8098Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "=True",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.851578Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ")]",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.893693Z",
"done": true,
"done_reason": "stop",
"total_duration": 885274541,
"load_duration": 99578333,
"prompt_eval_count": 514,
"prompt_eval_duration": 67915875,
"eval_count": 18,
"eval_duration": 717086791,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,74 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
}
],
"stream": false,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather in a given city",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city to get the weather for"
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "0fe94e7d-f25b-4843-ba0a-e402e0764830",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "I can\u2019t help with that. If you're looking for current weather information, I recommend checking a weather website or app, such as AccuWeather or Weather.com. Is there anything else I can help you with?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920402,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 45,
"prompt_tokens": 27,
"total_tokens": 72,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,55 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Test trace 0"
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-272",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "I'm happy to help you with a test. Since we are in the middle of a text-based conversation, I'll do my best to simulate a simple test tracing process.\n\n**Trace Test Results**\n\nTo perform this test, please follow these steps:\n\n1. Type \"test\" on command mode.\n2. Press Enter.\n\nNow, let's start tracing...\n\nTest Tracing Results:\nTest Case: General Functions\nTest Case Result: PASS\n\nSystem Response:\n\n```\n# System Boot Time: 2023-10-13T14:30:00\n# CPU Temperature: 35\u00b0C\n# Disk Space Available: 80%\n```\n\nNext Steps?\n\nType 'done' to exit the test, or 'run' for more tests.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758978134,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 152,
"prompt_tokens": 29,
"total_tokens": 181,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,44 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
"stop": "1963",
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-183",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Michael Jordan was born in the year of "
}
],
"created": 1758978053,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 11,
"prompt_tokens": 48,
"total_tokens": 59,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,112 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"stream": true,
"tool_choice": "auto",
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-634",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_wubm4yax",
"function": {
"arguments": "{\"location\":\"San Francisco, CA\"}",
"name": "get_weather"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758975115,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-634",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 1758975115,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,47 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
"stop": [
"blathering",
"1963"
],
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-381",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Michael Jordan was born in the year of "
}
],
"created": 1758978056,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 11,
"prompt_tokens": 48,
"total_tokens": 59,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,55 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Test trace 1"
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-122",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "It appears you're trying to initiate a conversation or test the functionality of this AI system. I'm happy to chat with you!\n\nWould you like to:\nA) Ask me a question on a specific topic\nB) Engage in a conversational dialogue on a topic of your choice\nC) Play a text-based game\nD) Test my language understanding capabilities\n\nPlease respond with the letter of your preferred activity.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758978142,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 85,
"prompt_tokens": 29,
"total_tokens": 114,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -13,22 +13,23 @@
"__data__": {
"models": [
{
"model": "llama3.2-vision:11b",
"name": "llama3.2-vision:11b",
"digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
"expires_at": "2025-09-03T11:51:35.966409-07:00",
"size": 12401209008,
"size_vram": 12401209008,
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-09-27T11:54:56.718552-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "mllama",
"family": "llama",
"families": [
"mllama"
"llama"
],
"parameter_size": "10.7B",
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
"context_length": 4096
}
]
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,43 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "1bbb8db5-63e5-40cd-8ffe-59e0e88bf8f0",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": "4. At the beginning of the year, a woman has $5,000"
}
],
"created": 1758920353,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 16,
"prompt_tokens": 25,
"total_tokens": 41,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Which planet has rings around it with a name starting with letter S?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "26632ea9-3481-419d-bc0d-83c177257bc4",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "There are two planets in our solar system with ring systems that have names starting with the letter S:\n\n1. **Saturn** - Its ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice and rock particles that range in size from tiny dust grains to massive boulders.\n2. **Saturn's moon** - The ring system of **Saturn's moon, Rhea**, is sometimes referred to as a \"ring system\" even though it's much smaller and less prominent than Saturn's. However, it's worth noting that Rhea's ring system is not as well-known as Saturn's.\n\nIf you're looking for a planet with a ring system that starts with the letter S and is not a moon, then the answer is Saturn!",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920397,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 164,
"prompt_tokens": 24,
"total_tokens": 188,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,185 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"greet_everyone\",\n \"description\": \"\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"url\"],\n \"properties\": {\n \"url\": {\n \"type\": \"string\",\n \"description\": \"\"\n }\n }\n }\n },\n {\n \"name\": \"get_boiling_point\",\n \"description\": \"\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n \",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"liquid_name\"],\n \"properties\": {\n \"liquid_name\": {\n \"type\": \"string\",\n \"description\": \"\"\n },\n \"celsius\": {\n \"type\": \"boolean\",\n \"description\": \"\",\n \"default\": \"True\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.034121Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[g",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.07569Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "reet",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.116927Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_every",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.159755Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "one",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.201675Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "(url",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.243056Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "=\"",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.284651Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "world",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.326276Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\")]",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.367959Z",
"done": true,
"done_reason": "stop",
"total_duration": 5381441291,
"load_duration": 4112439791,
"prompt_eval_count": 459,
"prompt_eval_duration": 932587833,
"eval_count": 9,
"eval_duration": 334328250,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,706 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": "Hello!",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " It",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": "'s",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " nice",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " to",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " meet",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": ".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " Is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " there",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " something",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " I",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " can",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " help",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " with",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": ",",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " or",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " would",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " like",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " to",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " chat",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": "?",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 26,
"prompt_tokens": 14,
"total_tokens": 40,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,996 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
"max_tokens": 50,
"stream": true,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " a"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " __________________"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "_____"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "##"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Step"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " "
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "1"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ":"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Identify"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " flower"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " mentioned"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " in"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " sentence"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "The"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " sentence"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " mentions"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " \""
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "vio"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\"\n\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "##"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Step"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " "
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "2"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ":"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Determine"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " flower"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " v"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "io"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " are"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "V"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "io"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " are"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " a"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": ""
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 50,
"prompt_tokens": 25,
"total_tokens": 75,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Which planet do humans live on?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "0fd60cd7-dc72-45b7-808c-4da91de80093",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Humans live on a planet called Earth.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920388,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 9,
"prompt_tokens": 17,
"total_tokens": 26,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,527 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-1-dev-fp8",
"created": 1729532889,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": false,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
"created": 1743381121,
"object": "model",
"owned_by": "tvergho-87e44d",
"kind": "HF_PEFT_ADDON",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-kontext-max",
"created": 1750714611,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-kontext-pro",
"created": 1750488264,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
"created": 1748467427,
"object": "model",
"owned_by": "sentientfoundation-serverless",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3",
"created": 1735576668,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
"created": 1739563474,
"object": "model",
"owned_by": "sentientfoundation",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/gpt-oss-120b",
"created": 1754345600,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
"created": 1753211090,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
"created": 1753916446,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
"created": 1753124424,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
"created": 1753455434,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-embedding-8b",
"created": 1755707090,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 40960
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3-0324",
"created": 1742827220,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3p1-terminus",
"created": 1758586241,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/kimi-k2-instruct",
"created": 1752259096,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/gpt-oss-20b",
"created": 1754345466,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"created": 1743878495,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": true,
"context_length": 1048576
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
"created": 1754063588,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"created": 1733442103,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
"created": 1743392739,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false,
"context_length": 128000
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b",
"created": 1745885249,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/glm-4p5-air",
"created": 1754089426,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1",
"created": 1737397673,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"created": 1721692808,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1-basic",
"created": 1742306746,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3p1",
"created": 1755758988,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-1-schnell-fp8",
"created": 1729535376,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": false,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/glm-4p5",
"created": 1753809636,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/kimi-k2-instruct-0905",
"created": 1757018994,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
"created": 1721428386,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama4-scout-instruct-basic",
"created": 1743878279,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": true,
"context_length": 1048576
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b",
"created": 1745878133,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
"created": 1721287357,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1-0528",
"created": 1748456377,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/mixtral-8x22b-instruct",
"created": 1713375508,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 65536
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
"created": 1753808388,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
}
],
"is_streaming": false
}
}

View file

@ -0,0 +1,834 @@
{
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-0613",
"created": 1686588896,
"object": "model",
"owned_by": "openai"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4",
"created": 1687882411,
"object": "model",
"owned_by": "openai"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo",
"created": 1677610602,
"object": "model",
"owned_by": "openai"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-codex",
"created": 1757527818,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-audio-2025-08-28",
"created": 1756256146,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-realtime",
"created": 1756271701,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-realtime-2025-08-28",
"created": 1756271773,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-audio",
"created": 1756339249,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "davinci-002",
"created": 1692634301,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "babbage-002",
"created": 1692634615,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-instruct",
"created": 1692901427,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-instruct-0914",
"created": 1694122472,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "dall-e-3",
"created": 1698785189,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "dall-e-2",
"created": 1698798177,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-1106-preview",
"created": 1698957206,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-1106",
"created": 1698959748,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1-hd",
"created": 1699046015,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1-1106",
"created": 1699053241,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1-hd-1106",
"created": 1699053533,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "text-embedding-3-small",
"created": 1705948997,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "text-embedding-3-large",
"created": 1705953180,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-0125-preview",
"created": 1706037612,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-turbo-preview",
"created": 1706037777,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-0125",
"created": 1706048358,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-turbo",
"created": 1712361441,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-turbo-2024-04-09",
"created": 1712601677,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o",
"created": 1715367049,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-2024-05-13",
"created": 1715368132,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-2024-07-18",
"created": 1721172717,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini",
"created": 1721172741,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-2024-08-06",
"created": 1722814719,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "chatgpt-4o-latest",
"created": 1723515131,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-mini-2024-09-12",
"created": 1725648979,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-mini",
"created": 1725649008,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview-2024-10-01",
"created": 1727131766,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview-2024-10-01",
"created": 1727389042,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview",
"created": 1727460443,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview",
"created": 1727659998,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "omni-moderation-latest",
"created": 1731689265,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "omni-moderation-2024-09-26",
"created": 1732734466,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview-2024-12-17",
"created": 1733945430,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview-2024-12-17",
"created": 1734034239,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-realtime-preview-2024-12-17",
"created": 1734112601,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-audio-preview-2024-12-17",
"created": 1734115920,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-2024-12-17",
"created": 1734326976,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1",
"created": 1734375816,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-realtime-preview",
"created": 1734387380,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-audio-preview",
"created": 1734387424,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-mini",
"created": 1737146383,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-mini-2025-01-31",
"created": 1738010200,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-2024-11-20",
"created": 1739331543,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-search-preview-2025-03-11",
"created": 1741388170,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-search-preview",
"created": 1741388720,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-search-preview-2025-03-11",
"created": 1741390858,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-search-preview",
"created": 1741391161,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-transcribe",
"created": 1742068463,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-transcribe",
"created": 1742068596,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-pro-2025-03-19",
"created": 1742251504,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-pro",
"created": 1742251791,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-tts",
"created": 1742403959,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-2025-04-16",
"created": 1744133301,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini-2025-04-16",
"created": 1744133506,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3",
"created": 1744225308,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini",
"created": 1744225351,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-2025-04-14",
"created": 1744315746,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1",
"created": 1744316542,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-mini-2025-04-14",
"created": 1744317547,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-mini",
"created": 1744318173,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-nano-2025-04-14",
"created": 1744321025,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-nano",
"created": 1744321707,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-image-1",
"created": 1745517030,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "codex-mini-latest",
"created": 1746673257,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-pro",
"created": 1748475349,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview-2025-06-03",
"created": 1748907838,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview-2025-06-03",
"created": 1748908498,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-pro-2025-06-10",
"created": 1749166761,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini-deep-research",
"created": 1749685485,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-deep-research",
"created": 1749840121,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-deep-research-2025-06-26",
"created": 1750865219,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini-deep-research-2025-06-26",
"created": 1750866121,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-chat-latest",
"created": 1754073306,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-2025-08-07",
"created": 1754075360,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5",
"created": 1754425777,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-mini-2025-08-07",
"created": 1754425867,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-mini",
"created": 1754425928,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-nano-2025-08-07",
"created": 1754426303,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-nano",
"created": 1754426384,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-16k",
"created": 1683758102,
"object": "model",
"owned_by": "openai-internal"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1",
"created": 1681940951,
"object": "model",
"owned_by": "openai-internal"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "whisper-1",
"created": 1677532384,
"object": "model",
"owned_by": "openai-internal"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "text-embedding-ada-002",
"created": 1671217299,
"object": "model",
"owned_by": "openai-internal"
}
}
],
"is_streaming": false
}
}

View file

@ -0,0 +1,96 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "nomic-embed-text:latest",
"created": 1756922046,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "all-minilm:l6-v2",
"created": 1756919946,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2-vision:11b",
"created": 1753926302,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2-vision:latest",
"created": 1753845527,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama-guard3:1b",
"created": 1753479584,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:1b",
"created": 1752814944,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "all-minilm:latest",
"created": 1748994610,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:3b",
"created": 1746123323,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:3b-instruct-fp16",
"created": 1746052428,
"object": "model",
"owned_by": "library"
}
}
],
"is_streaming": false
}
}

View file

@ -127,9 +127,8 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
name="fireworks",
description="Fireworks provider with a text model",
defaults={
"text_model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"vision_model": "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
"embedding_model": "nomic-ai/nomic-embed-text-v1.5",
"text_model": "fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct",
"embedding_model": "fireworks/accounts/fireworks/models/qwen3-embedding-8b",
},
),
}

View file

@ -32,8 +32,8 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
)
for i in range(2):
llama_stack_client.inference.chat_completion(
model_id=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
llama_stack_client.chat.completions.create(
model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
)
start_time = time.time()

View file

@ -83,12 +83,19 @@
],
"tools": [
{
"tool_name": "get_weather",
"description": "Get the current weather",
"parameters": {
"location": {
"param_type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": ["location"]
}
}
}
@ -116,12 +123,19 @@
],
"tools": [
{
"tool_name": "get_weather",
"description": "Get the current weather",
"parameters": {
"location": {
"param_type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": ["location"]
}
}
}
@ -162,12 +176,19 @@
],
"tools": [
{
"tool_name": "get_weather",
"description": "Get the current weather",
"parameters": {
"location": {
"param_type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": ["location"]
}
}
}
@ -192,66 +213,6 @@
]
}
},
"array_parameter": {
"data": {
"messages": [
[
{
"role": "user",
"content": "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
}
]
],
"tools": [
{
"tool_name": "addProduct",
"description": "Get the current weather",
"parameters": {
"name": {
"param_type": "string",
"description": "Name of the product"
},
"price": {
"param_type": "number",
"description": "Price of the product"
},
"inStock": {
"param_type": "boolean",
"description": "Availability status of the product."
},
"tags": {
"param_type": "list[str]",
"description": "List of product tags"
}
}
}
],
"tool_responses": [
{
"response": "{'response': 'Successfully added product with id: 123'}"
}
],
"expected": [
{
"num_tool_calls": 1,
"tool_name": "addProduct",
"tool_arguments": {
"name": "Widget",
"price": 19.99,
"inStock": true,
"tags": [
"new",
"sale"
]
}
},
{
"num_tool_calls": 0,
"answer": "123"
}
]
}
},
"sample_messages_tool_calling": {
"data": {
"messages": [
@ -270,13 +231,19 @@
],
"tools": [
{
"tool_name": "get_weather",
"description": "Get the current weather",
"parameters": {
"location": {
"param_type": "string",
"description": "The city and state, e.g. San Francisco, CA",
"required": true
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": ["location"]
}
}
}
@ -343,18 +310,23 @@
],
"tools": [
{
"tool_name": "get_object_namespace_list",
"description": "Get the list of objects in a namespace",
"parameters": {
"kind": {
"param_type": "string",
"description": "the type of object",
"required": true
},
"namespace": {
"param_type": "string",
"description": "the name of the namespace",
"required": true
"type": "function",
"function": {
"name": "get_object_namespace_list",
"description": "Get the list of objects in a namespace",
"parameters": {
"type": "object",
"properties": {
"kind": {
"type": "string",
"description": "the type of object"
},
"namespace": {
"type": "string",
"description": "the name of the namespace"
}
},
"required": ["kind", "namespace"]
}
}
}

View file

@ -31,6 +31,11 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
uri = mcp_server["server_url"]
# registering should not raise an error anymore even if you don't specify the auth token
try:
llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
except Exception:
pass
llama_stack_client.toolgroups.register(
toolgroup_id=test_toolgroup_id,
provider_id="model-context-protocol",

View file

@ -107,14 +107,34 @@ async def test_get_raw_document_text_deprecated_text_yaml_with_text_content_item
assert "text/yaml" in str(w[0].message)
async def test_get_raw_document_text_supports_json_mime_type():
"""Test that the function accepts application/json mime type."""
json_content = '{"name": "test", "version": "1.0", "items": ["item1", "item2"]}'
document = Document(content=json_content, mime_type="application/json")
result = await get_raw_document_text(document)
assert result == json_content
async def test_get_raw_document_text_with_json_text_content_item():
"""Test that the function handles JSON TextContentItem correctly."""
json_content = '{"key": "value", "nested": {"array": [1, 2, 3]}}'
document = Document(content=TextContentItem(text=json_content), mime_type="application/json")
result = await get_raw_document_text(document)
assert result == json_content
async def test_get_raw_document_text_rejects_unsupported_mime_types():
"""Test that the function rejects unsupported mime types."""
document = Document(
content="Some content",
mime_type="application/json", # Not supported
mime_type="application/pdf", # Not supported
)
with pytest.raises(ValueError, match="Unexpected document mime type: application/json"):
with pytest.raises(ValueError, match="Unexpected document mime type: application/pdf"):
await get_raw_document_text(document)

View file

@ -16,9 +16,11 @@ from llama_stack.apis.agents import (
)
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.inference import Inference
from llama_stack.apis.resource import ResourceType
from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.tools import ListToolsResponse, Tool, ToolGroups, ToolParameter, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from llama_stack.providers.inline.agents.meta_reference.agent_instance import ChatAgent
from llama_stack.providers.inline.agents.meta_reference.agents import MetaReferenceAgentsImpl
from llama_stack.providers.inline.agents.meta_reference.config import MetaReferenceAgentsImplConfig
from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo
@ -75,11 +77,11 @@ def sample_agent_config():
},
input_shields=["string"],
output_shields=["string"],
toolgroups=["string"],
toolgroups=["mcp::my_mcp_server"],
client_tools=[
{
"name": "string",
"description": "string",
"name": "client_tool",
"description": "Client Tool",
"parameters": [
{
"name": "string",
@ -226,3 +228,83 @@ async def test_delete_agent(agents_impl, sample_agent_config):
# Verify the agent was deleted
with pytest.raises(ValueError):
await agents_impl.get_agent(agent_id)
async def test__initialize_tools(agents_impl, sample_agent_config):
# Mock tool_groups_api.list_tools()
agents_impl.tool_groups_api.list_tools.return_value = ListToolsResponse(
data=[
Tool(
identifier="story_maker",
provider_id="model-context-protocol",
type=ResourceType.tool,
toolgroup_id="mcp::my_mcp_server",
description="Make a story",
parameters=[
ToolParameter(
name="story_title",
parameter_type="string",
description="Title of the story",
required=True,
title="Story Title",
),
ToolParameter(
name="input_words",
parameter_type="array",
description="Input words",
required=False,
items={"type": "string"},
title="Input Words",
default=[],
),
],
)
]
)
create_response = await agents_impl.create_agent(sample_agent_config)
agent_id = create_response.agent_id
# Get an instance of ChatAgent
chat_agent = await agents_impl._get_agent_impl(agent_id)
assert chat_agent is not None
assert isinstance(chat_agent, ChatAgent)
# Initialize tool definitions
await chat_agent._initialize_tools()
assert len(chat_agent.tool_defs) == 2
# Verify the first tool, which is a client tool
first_tool = chat_agent.tool_defs[0]
assert first_tool.tool_name == "client_tool"
assert first_tool.description == "Client Tool"
# Verify the second tool, which is an MCP tool that has an array-type property
second_tool = chat_agent.tool_defs[1]
assert second_tool.tool_name == "story_maker"
assert second_tool.description == "Make a story"
parameters = second_tool.parameters
assert len(parameters) == 2
# Verify a string property
story_title = parameters.get("story_title")
assert story_title is not None
assert story_title.param_type == "string"
assert story_title.description == "Title of the story"
assert story_title.required
assert story_title.items is None
assert story_title.title == "Story Title"
assert story_title.default is None
# Verify an array property
input_words = parameters.get("input_words")
assert input_words is not None
assert input_words.param_type == "array"
assert input_words.description == "Input words"
assert not input_words.required
assert input_words.items is not None
assert len(input_words.items) == 1
assert input_words.items.get("type") == "string"
assert input_words.title == "Input Words"
assert input_words.default == []

View file

@ -2,6 +2,4 @@
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .batch_inference import *
# the root directory of this source tree.

View file

@ -0,0 +1,147 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""
Unit tests for MCP tool parameter conversion in streaming responses.
This tests the fix for handling array-type parameters with 'items' field
when converting MCP tool definitions to OpenAI format.
"""
from llama_stack.apis.tools import ToolDef, ToolParameter
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
def test_mcp_tool_conversion_with_array_items():
"""
Test that MCP tool parameters with array type and items field are properly converted.
This is a regression test for the bug where array parameters without 'items'
caused OpenAI API validation errors like:
"Invalid schema for function 'pods_exec': In context=('properties', 'command'),
array schema missing items."
"""
# Create a tool parameter with array type and items specification
# This mimics what kubernetes-mcp-server's pods_exec tool has
tool_param = ToolParameter(
name="command",
parameter_type="array",
description="Command to execute in the pod",
required=True,
items={"type": "string"}, # This is the crucial field
)
# Convert to ToolDefinition format (as done in streaming.py)
tool_def = ToolDefinition(
tool_name="test_tool",
description="Test tool with array parameter",
parameters={
"command": ToolParamDefinition(
param_type=tool_param.parameter_type,
description=tool_param.description,
required=tool_param.required,
default=tool_param.default,
items=tool_param.items, # The fix: ensure items is passed through
)
},
)
# Convert to OpenAI format
openai_tool = convert_tooldef_to_openai_tool(tool_def)
# Verify the conversion includes the items field
assert openai_tool["type"] == "function"
assert openai_tool["function"]["name"] == "test_tool"
assert "parameters" in openai_tool["function"]
parameters = openai_tool["function"]["parameters"]
assert "properties" in parameters
assert "command" in parameters["properties"]
command_param = parameters["properties"]["command"]
assert command_param["type"] == "array"
assert "items" in command_param, "Array parameter must have 'items' field for OpenAI API"
assert command_param["items"] == {"type": "string"}
def test_mcp_tool_conversion_without_array():
"""Test that non-array parameters work correctly without items field."""
tool_param = ToolParameter(
name="name",
parameter_type="string",
description="Name parameter",
required=True,
)
tool_def = ToolDefinition(
tool_name="test_tool",
description="Test tool with string parameter",
parameters={
"name": ToolParamDefinition(
param_type=tool_param.parameter_type,
description=tool_param.description,
required=tool_param.required,
items=tool_param.items, # Will be None for non-array types
)
},
)
openai_tool = convert_tooldef_to_openai_tool(tool_def)
# Verify basic structure
assert openai_tool["type"] == "function"
parameters = openai_tool["function"]["parameters"]
assert "name" in parameters["properties"]
name_param = parameters["properties"]["name"]
assert name_param["type"] == "string"
# items should not be present for non-array types
assert "items" not in name_param or name_param.get("items") is None
def test_mcp_tool_conversion_complex_array_items():
"""Test array parameter with complex items schema (object type)."""
tool_param = ToolParameter(
name="configs",
parameter_type="array",
description="Array of configuration objects",
required=False,
items={
"type": "object",
"properties": {
"key": {"type": "string"},
"value": {"type": "string"},
},
"required": ["key"],
},
)
tool_def = ToolDefinition(
tool_name="test_tool",
description="Test tool with complex array parameter",
parameters={
"configs": ToolParamDefinition(
param_type=tool_param.parameter_type,
description=tool_param.description,
required=tool_param.required,
items=tool_param.items,
)
},
)
openai_tool = convert_tooldef_to_openai_tool(tool_def)
# Verify complex items schema is preserved
parameters = openai_tool["function"]["parameters"]
configs_param = parameters["properties"]["configs"]
assert configs_param["type"] == "array"
assert "items" in configs_param
assert configs_param["items"]["type"] == "object"
assert "properties" in configs_param["items"]
assert "key" in configs_param["items"]["properties"]
assert "value" in configs_param["items"]["properties"]

View file

@ -4,11 +4,11 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from unittest.mock import MagicMock, PropertyMock, patch
from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
import pytest
from llama_stack.apis.inference import Model
from llama_stack.apis.inference import Model, OpenAIUserMessageParam
from llama_stack.apis.models import ModelType
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@ -43,8 +43,17 @@ class OpenAIMixinWithEmbeddingsImpl(OpenAIMixin):
@pytest.fixture
def mixin():
"""Create a test instance of OpenAIMixin"""
return OpenAIMixinImpl()
"""Create a test instance of OpenAIMixin with mocked model_store"""
mixin_instance = OpenAIMixinImpl()
# just enough to satisfy _get_provider_model_id calls
mock_model_store = MagicMock()
mock_model = MagicMock()
mock_model.provider_resource_id = "test-provider-resource-id"
mock_model_store.get_model = AsyncMock(return_value=mock_model)
mixin_instance.model_store = mock_model_store
return mixin_instance
@pytest.fixture
@ -205,6 +214,74 @@ class TestOpenAIMixinCacheBehavior:
assert "final-mock-model-id" in mixin._model_cache
class TestOpenAIMixinImagePreprocessing:
"""Test cases for image preprocessing functionality"""
async def test_openai_chat_completion_with_image_preprocessing_enabled(self, mixin):
"""Test that image URLs are converted to base64 when download_images is True"""
mixin.download_images = True
message = OpenAIUserMessageParam(
role="user",
content=[
{"type": "text", "text": "What's in this image?"},
{"type": "image_url", "image_url": {"url": "http://example.com/image.jpg"}},
],
)
mock_client = MagicMock()
mock_response = MagicMock()
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
with patch.object(type(mixin), "client", new_callable=PropertyMock, return_value=mock_client):
with patch("llama_stack.providers.utils.inference.openai_mixin.localize_image_content") as mock_localize:
mock_localize.return_value = (b"fake_image_data", "jpeg")
await mixin.openai_chat_completion(model="test-model", messages=[message])
mock_localize.assert_called_once_with("http://example.com/image.jpg")
mock_client.chat.completions.create.assert_called_once()
call_args = mock_client.chat.completions.create.call_args
processed_messages = call_args[1]["messages"]
assert len(processed_messages) == 1
content = processed_messages[0]["content"]
assert len(content) == 2
assert content[0]["type"] == "text"
assert content[1]["type"] == "image_url"
assert content[1]["image_url"]["url"] == "data:image/jpeg;base64,ZmFrZV9pbWFnZV9kYXRh"
async def test_openai_chat_completion_with_image_preprocessing_disabled(self, mixin):
"""Test that image URLs are not modified when download_images is False"""
mixin.download_images = False # explicitly set to False
message = OpenAIUserMessageParam(
role="user",
content=[
{"type": "text", "text": "What's in this image?"},
{"type": "image_url", "image_url": {"url": "http://example.com/image.jpg"}},
],
)
mock_client = MagicMock()
mock_response = MagicMock()
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
with patch.object(type(mixin), "client", new_callable=PropertyMock, return_value=mock_client):
with patch("llama_stack.providers.utils.inference.openai_mixin.localize_image_content") as mock_localize:
await mixin.openai_chat_completion(model="test-model", messages=[message])
mock_localize.assert_not_called()
mock_client.chat.completions.create.assert_called_once()
call_args = mock_client.chat.completions.create.call_args
processed_messages = call_args[1]["messages"]
assert len(processed_messages) == 1
content = processed_messages[0]["content"]
assert len(content) == 2
assert content[1]["image_url"]["url"] == "http://example.com/image.jpg"
class TestOpenAIMixinEmbeddingModelMetadata:
"""Test cases for embedding_model_metadata attribute functionality"""