Merge b1cbfe99f9 into sapling-pr-archive-ehhuang

This commit is contained in:
ehhuang 2025-09-29 15:52:57 -07:00 committed by GitHub
commit 91898e6598
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
81 changed files with 51742 additions and 2402 deletions

View file

@ -43,7 +43,7 @@ jobs:
# Cache oasdiff to avoid checksum failures and speed up builds # Cache oasdiff to avoid checksum failures and speed up builds
- name: Cache oasdiff - name: Cache oasdiff
id: cache-oasdiff id: cache-oasdiff
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830
with: with:
path: ~/oasdiff path: ~/oasdiff
key: oasdiff-${{ runner.os }} key: oasdiff-${{ runner.os }}

View file

@ -4,6 +4,8 @@ include llama_stack/models/llama/llama4/tokenizer.model
include llama_stack/core/*.sh include llama_stack/core/*.sh
include llama_stack/cli/scripts/*.sh include llama_stack/cli/scripts/*.sh
include llama_stack/distributions/*/*.yaml include llama_stack/distributions/*/*.yaml
include llama_stack/providers/tests/test_cases/inference/*.json exclude llama_stack/distributions/ci-tests
include tests/integration/test_cases/inference/*.json
include llama_stack/models/llama/*/*.md include llama_stack/models/llama/*/*.md
include llama_stack/tests/integration/*.jpg include llama_stack/tests/integration/*.jpg
prune llama_stack/distributions/ci-tests

View file

@ -139,18 +139,7 @@ Methods:
- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn">client.agents.turn.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn_create_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn_create_response.py">TurnCreateResponse</a></code> - <code title="post /v1/agents/{agent_id}/session/{session_id}/turn">client.agents.turn.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn_create_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn_create_response.py">TurnCreateResponse</a></code>
- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.agents.turn.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn.py">Turn</a></code> - <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.agents.turn.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn.py">Turn</a></code>
## BatchInference
Types:
```python
from llama_stack_client.types import BatchInferenceChatCompletionResponse
```
Methods:
- <code title="post /v1/batch-inference/chat-completion">client.batch_inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/batch_inference.py">chat_completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/batch_inference_chat_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/batch_inference_chat_completion_response.py">BatchInferenceChatCompletionResponse</a></code>
- <code title="post /v1/batch-inference/completion">client.batch_inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/batch_inference.py">completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/batch_inference_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/shared/batch_completion.py">BatchCompletion</a></code>
## Datasets ## Datasets

View file

@ -548,7 +548,6 @@ class Generator:
if op.defining_class.__name__ in [ if op.defining_class.__name__ in [
"SyntheticDataGeneration", "SyntheticDataGeneration",
"PostTraining", "PostTraining",
"BatchInference",
]: ]:
op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)" op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
print(op.defining_class.__name__) print(op.defining_class.__name__)

View file

@ -87,94 +87,6 @@
} }
} }
}, },
"/v1/inference/batch-chat-completion": {
"post": {
"responses": {
"200": {
"description": "A BatchChatCompletionResponse with the full completions.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchChatCompletionResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Inference"
],
"summary": "Generate chat completions for a batch of messages using the specified model.",
"description": "Generate chat completions for a batch of messages using the specified model.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchChatCompletionRequest"
}
}
},
"required": true
}
}
},
"/v1/inference/batch-completion": {
"post": {
"responses": {
"200": {
"description": "A BatchCompletionResponse with the full completions.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchCompletionResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Inference"
],
"summary": "Generate completions for a batch of content using the specified model.",
"description": "Generate completions for a batch of content using the specified model.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchCompletionRequest"
}
}
},
"required": true
}
}
},
"/v1alpha/post-training/job/cancel": { "/v1alpha/post-training/job/cancel": {
"post": { "post": {
"responses": { "responses": {
@ -281,7 +193,7 @@
} }
}, },
"tags": [ "tags": [
"BatchInference (Coming Soon)" "Inference"
], ],
"summary": "Generate a chat completion for the given messages using the specified model.", "summary": "Generate a chat completion for the given messages using the specified model.",
"description": "Generate a chat completion for the given messages using the specified model.", "description": "Generate a chat completion for the given messages using the specified model.",
@ -298,55 +210,6 @@
} }
} }
}, },
"/v1/inference/completion": {
"post": {
"responses": {
"200": {
"description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CompletionResponse"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/CompletionResponseStreamChunk"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"BatchInference (Coming Soon)"
],
"summary": "Generate a completion for the given content using the specified model.",
"description": "Generate a completion for the given content using the specified model.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CompletionRequest"
}
}
},
"required": true
}
}
},
"/v1/agents": { "/v1/agents": {
"get": { "get": {
"responses": { "responses": {
@ -6346,6 +6209,20 @@
], ],
"title": "AppendRowsRequest" "title": "AppendRowsRequest"
}, },
"CancelTrainingJobRequest": {
"type": "object",
"properties": {
"job_uuid": {
"type": "string",
"description": "The UUID of the job to cancel."
}
},
"additionalProperties": false,
"required": [
"job_uuid"
],
"title": "CancelTrainingJobRequest"
},
"CompletionMessage": { "CompletionMessage": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -6906,6 +6783,31 @@
"type": "boolean", "type": "boolean",
"default": true "default": true
}, },
"items": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"title": {
"type": "string"
},
"default": { "default": {
"oneOf": [ "oneOf": [
{ {
@ -7051,26 +6953,23 @@
"title": "UserMessage", "title": "UserMessage",
"description": "A message from the user in a chat conversation." "description": "A message from the user in a chat conversation."
}, },
"BatchChatCompletionRequest": { "ChatCompletionRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"model_id": { "model_id": {
"type": "string", "type": "string",
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint." "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
}, },
"messages_batch": { "messages": {
"type": "array",
"items": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/Message" "$ref": "#/components/schemas/Message"
}
}, },
"description": "The messages to generate completions for." "description": "List of messages in the conversation."
}, },
"sampling_params": { "sampling_params": {
"$ref": "#/components/schemas/SamplingParams", "$ref": "#/components/schemas/SamplingParams",
"description": "(Optional) Parameters to control the sampling strategy." "description": "Parameters to control the sampling strategy."
}, },
"tools": { "tools": {
"type": "array", "type": "array",
@ -7079,13 +6978,31 @@
}, },
"description": "(Optional) List of tool definitions available to the model." "description": "(Optional) List of tool definitions available to the model."
}, },
"tool_config": { "tool_choice": {
"$ref": "#/components/schemas/ToolConfig", "type": "string",
"description": "(Optional) Configuration for tool use." "enum": [
"auto",
"required",
"none"
],
"description": "(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead."
},
"tool_prompt_format": {
"type": "string",
"enum": [
"json",
"function_tag",
"python_list"
],
"description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead."
}, },
"response_format": { "response_format": {
"$ref": "#/components/schemas/ResponseFormat", "$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding." "description": "(Optional) Grammar specification for guided (structured) decoding. There are two options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it."
},
"stream": {
"type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
}, },
"logprobs": { "logprobs": {
"type": "object", "type": "object",
@ -7098,32 +7015,18 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned." "description": "(Optional) If specified, log probabilities for each token position will be returned."
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig",
"description": "(Optional) Configuration for tool use."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"model_id", "model_id",
"messages_batch" "messages"
], ],
"title": "BatchChatCompletionRequest" "title": "ChatCompletionRequest"
},
"BatchChatCompletionResponse": {
"type": "object",
"properties": {
"batch": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ChatCompletionResponse"
},
"description": "List of chat completion responses, one for each conversation in the batch"
}
},
"additionalProperties": false,
"required": [
"batch"
],
"title": "BatchChatCompletionResponse",
"description": "Response from a batch chat completion request."
}, },
"ChatCompletionResponse": { "ChatCompletionResponse": {
"type": "object", "type": "object",
@ -7203,194 +7106,6 @@
"title": "TokenLogProbs", "title": "TokenLogProbs",
"description": "Log probabilities for generated tokens." "description": "Log probabilities for generated tokens."
}, },
"BatchCompletionRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string",
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
},
"content_batch": {
"type": "array",
"items": {
"$ref": "#/components/schemas/InterleavedContent"
},
"description": "The content to generate completions for."
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams",
"description": "(Optional) Parameters to control the sampling strategy."
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding."
},
"logprobs": {
"type": "object",
"properties": {
"top_k": {
"type": "integer",
"default": 0,
"description": "How many tokens (for each position) to return log probabilities for."
}
},
"additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
}
},
"additionalProperties": false,
"required": [
"model_id",
"content_batch"
],
"title": "BatchCompletionRequest"
},
"BatchCompletionResponse": {
"type": "object",
"properties": {
"batch": {
"type": "array",
"items": {
"$ref": "#/components/schemas/CompletionResponse"
},
"description": "List of completion responses, one for each input in the batch"
}
},
"additionalProperties": false,
"required": [
"batch"
],
"title": "BatchCompletionResponse",
"description": "Response from a batch completion request."
},
"CompletionResponse": {
"type": "object",
"properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
},
"description": "(Optional) List of metrics associated with the API response"
},
"content": {
"type": "string",
"description": "The generated completion text"
},
"stop_reason": {
"type": "string",
"enum": [
"end_of_turn",
"end_of_message",
"out_of_tokens"
],
"description": "Reason why generation stopped"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
},
"description": "Optional log probabilities for generated tokens"
}
},
"additionalProperties": false,
"required": [
"content",
"stop_reason"
],
"title": "CompletionResponse",
"description": "Response from a completion request."
},
"CancelTrainingJobRequest": {
"type": "object",
"properties": {
"job_uuid": {
"type": "string",
"description": "The UUID of the job to cancel."
}
},
"additionalProperties": false,
"required": [
"job_uuid"
],
"title": "CancelTrainingJobRequest"
},
"ChatCompletionRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string",
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
},
"messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
},
"description": "List of messages in the conversation."
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams",
"description": "Parameters to control the sampling strategy."
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolDefinition"
},
"description": "(Optional) List of tool definitions available to the model."
},
"tool_choice": {
"type": "string",
"enum": [
"auto",
"required",
"none"
],
"description": "(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead."
},
"tool_prompt_format": {
"type": "string",
"enum": [
"json",
"function_tag",
"python_list"
],
"description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead."
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding. There are two options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it."
},
"stream": {
"type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
},
"logprobs": {
"type": "object",
"properties": {
"top_k": {
"type": "integer",
"default": 0,
"description": "How many tokens (for each position) to return log probabilities for."
}
},
"additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig",
"description": "(Optional) Configuration for tool use."
}
},
"additionalProperties": false,
"required": [
"model_id",
"messages"
],
"title": "ChatCompletionRequest"
},
"ChatCompletionResponseEvent": { "ChatCompletionResponseEvent": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -7560,87 +7275,6 @@
"title": "ToolCallDelta", "title": "ToolCallDelta",
"description": "A tool call content delta for streaming responses." "description": "A tool call content delta for streaming responses."
}, },
"CompletionRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string",
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
},
"content": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content to generate a completion for."
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams",
"description": "(Optional) Parameters to control the sampling strategy."
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding."
},
"stream": {
"type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
},
"logprobs": {
"type": "object",
"properties": {
"top_k": {
"type": "integer",
"default": 0,
"description": "How many tokens (for each position) to return log probabilities for."
}
},
"additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
}
},
"additionalProperties": false,
"required": [
"model_id",
"content"
],
"title": "CompletionRequest"
},
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
},
"description": "(Optional) List of metrics associated with the API response"
},
"delta": {
"type": "string",
"description": "New content generated since last chunk. This can be one or more tokens."
},
"stop_reason": {
"type": "string",
"enum": [
"end_of_turn",
"end_of_message",
"out_of_tokens"
],
"description": "Optional reason why generation stopped, if complete"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
},
"description": "Optional log probabilities for generated tokens"
}
},
"additionalProperties": false,
"required": [
"delta"
],
"title": "CompletionResponseStreamChunk",
"description": "A chunk of a streamed completion response."
},
"AgentConfig": { "AgentConfig": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -7848,6 +7482,14 @@
"default": true, "default": true,
"description": "Whether this parameter is required for tool invocation" "description": "Whether this parameter is required for tool invocation"
}, },
"items": {
"type": "object",
"description": "Type of the elements when parameter_type is array"
},
"title": {
"type": "string",
"description": "(Optional) Title of the parameter"
},
"default": { "default": {
"oneOf": [ "oneOf": [
{ {
@ -18779,11 +18421,6 @@
"description": "Main functionalities provided by this API:\n- Create agents with specific instructions and ability to use tools.\n- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".\n- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).\n- Agents can be provided with various shields (see the Safety API for more details).\n- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.", "description": "Main functionalities provided by this API:\n- Create agents with specific instructions and ability to use tools.\n- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".\n- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).\n- Agents can be provided with various shields (see the Safety API for more details).\n- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.",
"x-displayName": "Agents API for creating and interacting with agentic systems." "x-displayName": "Agents API for creating and interacting with agentic systems."
}, },
{
"name": "BatchInference (Coming Soon)",
"description": "This is an asynchronous API. If the request is successful, the response will be a job which can be polled for completion.\n\nNOTE: This API is not yet implemented and is subject to change in concert with other asynchronous APIs\nincluding (post-training, evals, etc).",
"x-displayName": "Batch inference API for generating completions and chat completions."
},
{ {
"name": "Benchmarks" "name": "Benchmarks"
}, },
@ -18858,7 +18495,6 @@
"name": "Operations", "name": "Operations",
"tags": [ "tags": [
"Agents", "Agents",
"BatchInference (Coming Soon)",
"Benchmarks", "Benchmarks",
"DatasetIO", "DatasetIO",
"Datasets", "Datasets",

View file

@ -43,72 +43,6 @@ paths:
schema: schema:
$ref: '#/components/schemas/AppendRowsRequest' $ref: '#/components/schemas/AppendRowsRequest'
required: true required: true
/v1/inference/batch-chat-completion:
post:
responses:
'200':
description: >-
A BatchChatCompletionResponse with the full completions.
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
summary: >-
Generate chat completions for a batch of messages using the specified model.
description: >-
Generate chat completions for a batch of messages using the specified model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionRequest'
required: true
/v1/inference/batch-completion:
post:
responses:
'200':
description: >-
A BatchCompletionResponse with the full completions.
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
summary: >-
Generate completions for a batch of content using the specified model.
description: >-
Generate completions for a batch of content using the specified model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
/v1alpha/post-training/job/cancel: /v1alpha/post-training/job/cancel:
post: post:
responses: responses:
@ -186,7 +120,7 @@ paths:
default: default:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- BatchInference (Coming Soon) - Inference
summary: >- summary: >-
Generate a chat completion for the given messages using the specified model. Generate a chat completion for the given messages using the specified model.
description: >- description: >-
@ -198,43 +132,6 @@ paths:
schema: schema:
$ref: '#/components/schemas/ChatCompletionRequest' $ref: '#/components/schemas/ChatCompletionRequest'
required: true required: true
/v1/inference/completion:
post:
responses:
'200':
description: >-
If stream=False, returns a CompletionResponse with the full completion.
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionResponse'
text/event-stream:
schema:
$ref: '#/components/schemas/CompletionResponseStreamChunk'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- BatchInference (Coming Soon)
summary: >-
Generate a completion for the given content using the specified model.
description: >-
Generate a completion for the given content using the specified model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionRequest'
required: true
/v1/agents: /v1/agents:
get: get:
responses: responses:
@ -4559,6 +4456,16 @@ components:
required: required:
- rows - rows
title: AppendRowsRequest title: AppendRowsRequest
CancelTrainingJobRequest:
type: object
properties:
job_uuid:
type: string
description: The UUID of the job to cancel.
additionalProperties: false
required:
- job_uuid
title: CancelTrainingJobRequest
CompletionMessage: CompletionMessage:
type: object type: object
properties: properties:
@ -4959,6 +4866,16 @@ components:
required: required:
type: boolean type: boolean
default: true default: true
items:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
title:
type: string
default: default:
oneOf: oneOf:
- type: 'null' - type: 'null'
@ -5076,224 +4993,6 @@ components:
title: UserMessage title: UserMessage
description: >- description: >-
A message from the user in a chat conversation. A message from the user in a chat conversation.
BatchChatCompletionRequest:
type: object
properties:
model_id:
type: string
description: >-
The identifier of the model to use. The model must be registered with
Llama Stack and available via the /models endpoint.
messages_batch:
type: array
items:
type: array
items:
$ref: '#/components/schemas/Message'
description: >-
The messages to generate completions for.
sampling_params:
$ref: '#/components/schemas/SamplingParams'
description: >-
(Optional) Parameters to control the sampling strategy.
tools:
type: array
items:
$ref: '#/components/schemas/ToolDefinition'
description: >-
(Optional) List of tool definitions available to the model.
tool_config:
$ref: '#/components/schemas/ToolConfig'
description: (Optional) Configuration for tool use.
response_format:
$ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding.
logprobs:
type: object
properties:
top_k:
type: integer
default: 0
description: >-
How many tokens (for each position) to return log probabilities for.
additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false
required:
- model_id
- messages_batch
title: BatchChatCompletionRequest
BatchChatCompletionResponse:
type: object
properties:
batch:
type: array
items:
$ref: '#/components/schemas/ChatCompletionResponse'
description: >-
List of chat completion responses, one for each conversation in the batch
additionalProperties: false
required:
- batch
title: BatchChatCompletionResponse
description: >-
Response from a batch chat completion request.
ChatCompletionResponse:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
description: >-
(Optional) List of metrics associated with the API response
completion_message:
$ref: '#/components/schemas/CompletionMessage'
description: The complete response message
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
description: >-
Optional log probabilities for generated tokens
additionalProperties: false
required:
- completion_message
title: ChatCompletionResponse
description: Response from a chat completion request.
MetricInResponse:
type: object
properties:
metric:
type: string
description: The name of the metric
value:
oneOf:
- type: integer
- type: number
description: The numeric value of the metric
unit:
type: string
description: >-
(Optional) The unit of measurement for the metric value
additionalProperties: false
required:
- metric
- value
title: MetricInResponse
description: >-
A metric value included in API responses.
TokenLogProbs:
type: object
properties:
logprobs_by_token:
type: object
additionalProperties:
type: number
description: >-
Dictionary mapping tokens to their log probabilities
additionalProperties: false
required:
- logprobs_by_token
title: TokenLogProbs
description: Log probabilities for generated tokens.
BatchCompletionRequest:
type: object
properties:
model_id:
type: string
description: >-
The identifier of the model to use. The model must be registered with
Llama Stack and available via the /models endpoint.
content_batch:
type: array
items:
$ref: '#/components/schemas/InterleavedContent'
description: The content to generate completions for.
sampling_params:
$ref: '#/components/schemas/SamplingParams'
description: >-
(Optional) Parameters to control the sampling strategy.
response_format:
$ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding.
logprobs:
type: object
properties:
top_k:
type: integer
default: 0
description: >-
How many tokens (for each position) to return log probabilities for.
additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false
required:
- model_id
- content_batch
title: BatchCompletionRequest
BatchCompletionResponse:
type: object
properties:
batch:
type: array
items:
$ref: '#/components/schemas/CompletionResponse'
description: >-
List of completion responses, one for each input in the batch
additionalProperties: false
required:
- batch
title: BatchCompletionResponse
description: >-
Response from a batch completion request.
CompletionResponse:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
description: >-
(Optional) List of metrics associated with the API response
content:
type: string
description: The generated completion text
stop_reason:
type: string
enum:
- end_of_turn
- end_of_message
- out_of_tokens
description: Reason why generation stopped
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
description: >-
Optional log probabilities for generated tokens
additionalProperties: false
required:
- content
- stop_reason
title: CompletionResponse
description: Response from a completion request.
CancelTrainingJobRequest:
type: object
properties:
job_uuid:
type: string
description: The UUID of the job to cancel.
additionalProperties: false
required:
- job_uuid
title: CancelTrainingJobRequest
ChatCompletionRequest: ChatCompletionRequest:
type: object type: object
properties: properties:
@ -5372,6 +5071,65 @@ components:
- model_id - model_id
- messages - messages
title: ChatCompletionRequest title: ChatCompletionRequest
ChatCompletionResponse:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
description: >-
(Optional) List of metrics associated with the API response
completion_message:
$ref: '#/components/schemas/CompletionMessage'
description: The complete response message
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
description: >-
Optional log probabilities for generated tokens
additionalProperties: false
required:
- completion_message
title: ChatCompletionResponse
description: Response from a chat completion request.
MetricInResponse:
type: object
properties:
metric:
type: string
description: The name of the metric
value:
oneOf:
- type: integer
- type: number
description: The numeric value of the metric
unit:
type: string
description: >-
(Optional) The unit of measurement for the metric value
additionalProperties: false
required:
- metric
- value
title: MetricInResponse
description: >-
A metric value included in API responses.
TokenLogProbs:
type: object
properties:
logprobs_by_token:
type: object
additionalProperties:
type: number
description: >-
Dictionary mapping tokens to their log probabilities
additionalProperties: false
required:
- logprobs_by_token
title: TokenLogProbs
description: Log probabilities for generated tokens.
ChatCompletionResponseEvent: ChatCompletionResponseEvent:
type: object type: object
properties: properties:
@ -5507,81 +5265,6 @@ components:
title: ToolCallDelta title: ToolCallDelta
description: >- description: >-
A tool call content delta for streaming responses. A tool call content delta for streaming responses.
CompletionRequest:
type: object
properties:
model_id:
type: string
description: >-
The identifier of the model to use. The model must be registered with
Llama Stack and available via the /models endpoint.
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content to generate a completion for.
sampling_params:
$ref: '#/components/schemas/SamplingParams'
description: >-
(Optional) Parameters to control the sampling strategy.
response_format:
$ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding.
stream:
type: boolean
description: >-
(Optional) If True, generate an SSE event stream of the response. Defaults
to False.
logprobs:
type: object
properties:
top_k:
type: integer
default: 0
description: >-
How many tokens (for each position) to return log probabilities for.
additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false
required:
- model_id
- content
title: CompletionRequest
CompletionResponseStreamChunk:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
description: >-
(Optional) List of metrics associated with the API response
delta:
type: string
description: >-
New content generated since last chunk. This can be one or more tokens.
stop_reason:
type: string
enum:
- end_of_turn
- end_of_message
- out_of_tokens
description: >-
Optional reason why generation stopped, if complete
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
description: >-
Optional log probabilities for generated tokens
additionalProperties: false
required:
- delta
title: CompletionResponseStreamChunk
description: >-
A chunk of a streamed completion response.
AgentConfig: AgentConfig:
type: object type: object
properties: properties:
@ -5730,6 +5413,13 @@ components:
default: true default: true
description: >- description: >-
Whether this parameter is required for tool invocation Whether this parameter is required for tool invocation
items:
type: object
description: >-
Type of the elements when parameter_type is array
title:
type: string
description: (Optional) Title of the parameter
default: default:
oneOf: oneOf:
- type: 'null' - type: 'null'
@ -13983,18 +13673,6 @@ tags:
the RAG Tool and Vector IO APIs for more details. the RAG Tool and Vector IO APIs for more details.
x-displayName: >- x-displayName: >-
Agents API for creating and interacting with agentic systems. Agents API for creating and interacting with agentic systems.
- name: BatchInference (Coming Soon)
description: >-
This is an asynchronous API. If the request is successful, the response will
be a job which can be polled for completion.
NOTE: This API is not yet implemented and is subject to change in concert with
other asynchronous APIs
including (post-training, evals, etc).
x-displayName: >-
Batch inference API for generating completions and chat completions.
- name: Benchmarks - name: Benchmarks
- name: DatasetIO - name: DatasetIO
- name: Datasets - name: Datasets
@ -14037,7 +13715,6 @@ x-tagGroups:
- name: Operations - name: Operations
tags: tags:
- Agents - Agents
- BatchInference (Coming Soon)
- Benchmarks - Benchmarks
- DatasetIO - DatasetIO
- Datasets - Datasets

View file

@ -1,79 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Protocol, runtime_checkable
from llama_stack.apis.common.job_types import Job
from llama_stack.apis.inference import (
InterleavedContent,
LogProbConfig,
Message,
ResponseFormat,
SamplingParams,
ToolChoice,
ToolDefinition,
ToolPromptFormat,
)
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import webmethod
@runtime_checkable
class BatchInference(Protocol):
"""Batch inference API for generating completions and chat completions.
This is an asynchronous API. If the request is successful, the response will be a job which can be polled for completion.
NOTE: This API is not yet implemented and is subject to change in concert with other asynchronous APIs
including (post-training, evals, etc).
"""
@webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
async def completion(
self,
model: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> Job:
"""Generate completions for a batch of content.
:param model: The model to use for the completion.
:param content_batch: The content to complete.
:param sampling_params: The sampling parameters to use for the completion.
:param response_format: The response format to use for the completion.
:param logprobs: The logprobs to use for the completion.
:returns: A job for the completion.
"""
...
@webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
async def chat_completion(
self,
model: str,
messages_batch: list[list[Message]],
sampling_params: SamplingParams | None = None,
# zero-shot tool definitions as input to the model
tools: list[ToolDefinition] | None = None,
tool_choice: ToolChoice | None = ToolChoice.auto,
tool_prompt_format: ToolPromptFormat | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> Job:
"""Generate chat completions for a batch of messages.
:param model: The model to use for the chat completion.
:param messages_batch: The messages to complete.
:param sampling_params: The sampling parameters to use for the completion.
:param tools: The tools to use for the chat completion.
:param tool_choice: The tool choice to use for the chat completion.
:param tool_prompt_format: The tool prompt format to use for the chat completion.
:param response_format: The response format to use for the chat completion.
:param logprobs: The logprobs to use for the chat completion.
:returns: A job for the chat completion.
"""
...

View file

@ -975,26 +975,6 @@ class EmbeddingTaskType(Enum):
document = "document" document = "document"
@json_schema_type
class BatchCompletionResponse(BaseModel):
"""Response from a batch completion request.
:param batch: List of completion responses, one for each input in the batch
"""
batch: list[CompletionResponse]
@json_schema_type
class BatchChatCompletionResponse(BaseModel):
"""Response from a batch chat completion request.
:param batch: List of chat completion responses, one for each conversation in the batch
"""
batch: list[ChatCompletionResponse]
class OpenAICompletionWithInputMessages(OpenAIChatCompletion): class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
input_messages: list[OpenAIMessageParam] input_messages: list[OpenAIMessageParam]
@ -1028,7 +1008,6 @@ class InferenceProvider(Protocol):
model_store: ModelStore | None = None model_store: ModelStore | None = None
@webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
async def completion( async def completion(
self, self,
model_id: str, model_id: str,
@ -1051,27 +1030,6 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
async def batch_completion(
self,
model_id: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> BatchCompletionResponse:
"""Generate completions for a batch of content using the specified model.
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
:param content_batch: The content to generate completions for.
:param sampling_params: (Optional) Parameters to control the sampling strategy.
:param response_format: (Optional) Grammar specification for guided (structured) decoding.
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
:returns: A BatchCompletionResponse with the full completions.
"""
raise NotImplementedError("Batch completion is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
async def chat_completion( async def chat_completion(
self, self,
@ -1112,31 +1070,6 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
async def batch_chat_completion(
self,
model_id: str,
messages_batch: list[list[Message]],
sampling_params: SamplingParams | None = None,
tools: list[ToolDefinition] | None = None,
tool_config: ToolConfig | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> BatchChatCompletionResponse:
"""Generate chat completions for a batch of messages using the specified model.
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
:param messages_batch: The messages to generate completions for.
:param sampling_params: (Optional) Parameters to control the sampling strategy.
:param tools: (Optional) List of tool definitions available to the model.
:param tool_config: (Optional) Configuration for tool use.
:param response_format: (Optional) Grammar specification for guided (structured) decoding.
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
:returns: A BatchChatCompletionResponse with the full completions.
"""
raise NotImplementedError("Batch chat completion is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def embeddings( async def embeddings(
self, self,

View file

@ -27,6 +27,8 @@ class ToolParameter(BaseModel):
:param parameter_type: Type of the parameter (e.g., string, integer) :param parameter_type: Type of the parameter (e.g., string, integer)
:param description: Human-readable description of what the parameter does :param description: Human-readable description of what the parameter does
:param required: Whether this parameter is required for tool invocation :param required: Whether this parameter is required for tool invocation
:param items: Type of the elements when parameter_type is array
:param title: (Optional) Title of the parameter
:param default: (Optional) Default value for the parameter if not provided :param default: (Optional) Default value for the parameter if not provided
""" """
@ -34,6 +36,8 @@ class ToolParameter(BaseModel):
parameter_type: str parameter_type: str
description: str description: str
required: bool = Field(default=True) required: bool = Field(default=True)
items: dict | None = None
title: str | None = None
default: Any | None = None default: Any | None = None

View file

@ -20,8 +20,6 @@ from llama_stack.apis.common.content_types import (
) )
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
BatchChatCompletionResponse,
BatchCompletionResponse,
ChatCompletionResponse, ChatCompletionResponse,
ChatCompletionResponseEventType, ChatCompletionResponseEventType,
ChatCompletionResponseStreamChunk, ChatCompletionResponseStreamChunk,
@ -273,30 +271,6 @@ class InferenceRouter(Inference):
) )
return response return response
async def batch_chat_completion(
self,
model_id: str,
messages_batch: list[list[Message]],
tools: list[ToolDefinition] | None = None,
tool_config: ToolConfig | None = None,
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> BatchChatCompletionResponse:
logger.debug(
f"InferenceRouter.batch_chat_completion: {model_id=}, {len(messages_batch)=}, {sampling_params=}, {response_format=}, {logprobs=}",
)
provider = await self.routing_table.get_provider_impl(model_id)
return await provider.batch_chat_completion(
model_id=model_id,
messages_batch=messages_batch,
tools=tools,
tool_config=tool_config,
sampling_params=sampling_params,
response_format=response_format,
logprobs=logprobs,
)
async def completion( async def completion(
self, self,
model_id: str, model_id: str,
@ -338,20 +312,6 @@ class InferenceRouter(Inference):
return response return response
async def batch_completion(
self,
model_id: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> BatchCompletionResponse:
logger.debug(
f"InferenceRouter.batch_completion: {model_id=}, {len(content_batch)=}, {sampling_params=}, {response_format=}, {logprobs=}",
)
provider = await self.routing_table.get_provider_impl(model_id)
return await provider.batch_completion(model_id, content_batch, sampling_params, response_format, logprobs)
async def embeddings( async def embeddings(
self, self,
model_id: str, model_id: str,

View file

@ -14,7 +14,6 @@ from typing import Any
import yaml import yaml
from llama_stack.apis.agents import Agents from llama_stack.apis.agents import Agents
from llama_stack.apis.batch_inference import BatchInference
from llama_stack.apis.benchmarks import Benchmarks from llama_stack.apis.benchmarks import Benchmarks
from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets from llama_stack.apis.datasets import Datasets
@ -54,7 +53,6 @@ class LlamaStack(
Providers, Providers,
VectorDBs, VectorDBs,
Inference, Inference,
BatchInference,
Agents, Agents,
Safety, Safety,
SyntheticDataGeneration, SyntheticDataGeneration,

View file

@ -92,6 +92,8 @@ class ToolParamDefinition(BaseModel):
param_type: str param_type: str
description: str | None = None description: str | None = None
required: bool | None = True required: bool | None = True
items: Any | None = None
title: str | None = None
default: Any | None = None default: Any | None = None

View file

@ -798,6 +798,8 @@ class ChatAgent(ShieldRunnerMixin):
param_type=param.parameter_type, param_type=param.parameter_type,
description=param.description, description=param.description,
required=param.required, required=param.required,
items=param.items,
title=param.title,
default=param.default, default=param.default,
) )
for param in tool_def.parameters for param in tool_def.parameters
@ -841,6 +843,8 @@ class ChatAgent(ShieldRunnerMixin):
param_type=param.parameter_type, param_type=param.parameter_type,
description=param.description, description=param.description,
required=param.required, required=param.required,
items=param.items,
title=param.title,
default=param.default, default=param.default,
) )
for param in tool_def.parameters for param in tool_def.parameters
@ -920,7 +924,7 @@ async def get_raw_document_text(document: Document) -> str:
DeprecationWarning, DeprecationWarning,
stacklevel=2, stacklevel=2,
) )
elif not (document.mime_type.startswith("text/") or document.mime_type == "application/yaml"): elif not (document.mime_type.startswith("text/") or document.mime_type in ("application/yaml", "application/json")):
raise ValueError(f"Unexpected document mime type: {document.mime_type}") raise ValueError(f"Unexpected document mime type: {document.mime_type}")
if isinstance(document.content, URL): if isinstance(document.content, URL):

View file

@ -568,6 +568,7 @@ class StreamingResponseOrchestrator:
description=param.description, description=param.description,
required=param.required, required=param.required,
default=param.default, default=param.default,
items=param.items,
) )
for param in t.parameters for param in t.parameters
}, },

View file

@ -18,8 +18,6 @@ from llama_stack.apis.common.content_types import (
ToolCallParseStatus, ToolCallParseStatus,
) )
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
BatchChatCompletionResponse,
BatchCompletionResponse,
ChatCompletionRequest, ChatCompletionRequest,
ChatCompletionResponse, ChatCompletionResponse,
ChatCompletionResponseEvent, ChatCompletionResponseEvent,
@ -219,41 +217,6 @@ class MetaReferenceInferenceImpl(
results = await self._nonstream_completion([request]) results = await self._nonstream_completion([request])
return results[0] return results[0]
async def batch_completion(
self,
model_id: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
stream: bool | None = False,
logprobs: LogProbConfig | None = None,
) -> BatchCompletionResponse:
if sampling_params is None:
sampling_params = SamplingParams()
if logprobs:
assert logprobs.top_k == 1, f"Unexpected top_k={logprobs.top_k}"
content_batch = [
augment_content_with_response_format_prompt(response_format, content) for content in content_batch
]
request_batch = []
for content in content_batch:
request = CompletionRequest(
model=model_id,
content=content,
sampling_params=sampling_params,
response_format=response_format,
stream=stream,
logprobs=logprobs,
)
self.check_model(request)
request = await convert_request_to_raw(request)
request_batch.append(request)
results = await self._nonstream_completion(request_batch)
return BatchCompletionResponse(batch=results)
async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator: async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
tokenizer = self.generator.formatter.tokenizer tokenizer = self.generator.formatter.tokenizer
@ -399,49 +362,6 @@ class MetaReferenceInferenceImpl(
results = await self._nonstream_chat_completion([request]) results = await self._nonstream_chat_completion([request])
return results[0] return results[0]
async def batch_chat_completion(
self,
model_id: str,
messages_batch: list[list[Message]],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
tools: list[ToolDefinition] | None = None,
stream: bool | None = False,
logprobs: LogProbConfig | None = None,
tool_config: ToolConfig | None = None,
) -> BatchChatCompletionResponse:
if sampling_params is None:
sampling_params = SamplingParams()
if logprobs:
assert logprobs.top_k == 1, f"Unexpected top_k={logprobs.top_k}"
# wrapper request to make it easier to pass around (internal only, not exposed to API)
request_batch = []
for messages in messages_batch:
request = ChatCompletionRequest(
model=model_id,
messages=messages,
sampling_params=sampling_params,
tools=tools or [],
response_format=response_format,
logprobs=logprobs,
tool_config=tool_config or ToolConfig(),
)
self.check_model(request)
# augment and rewrite messages depending on the model
request.messages = chat_completion_request_to_messages(request, self.llama_model.core_model_id.value)
# download media and convert to raw content so we can send it to the model
request = await convert_request_to_raw(request)
request_batch.append(request)
if self.config.create_distributed_process_group:
if SEMAPHORE.locked():
raise RuntimeError("Only one concurrent request is supported")
results = await self._nonstream_chat_completion(request_batch)
return BatchChatCompletionResponse(batch=results)
async def _nonstream_chat_completion( async def _nonstream_chat_completion(
self, request_batch: list[ChatCompletionRequest] self, request_batch: list[ChatCompletionRequest]
) -> list[ChatCompletionResponse]: ) -> list[ChatCompletionResponse]:

View file

@ -61,6 +61,7 @@ logger = get_logger(name=__name__, category="inference::fireworks")
class FireworksInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, NeedsRequestProviderData): class FireworksInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, NeedsRequestProviderData):
embedding_model_metadata = { embedding_model_metadata = {
"nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192}, "nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192},
"accounts/fireworks/models/qwen3-embedding-8b": {"embedding_dimension": 4096, "context_length": 40960},
} }
def __init__(self, config: FireworksImplConfig) -> None: def __init__(self, config: FireworksImplConfig) -> None:

View file

@ -6,8 +6,7 @@
import asyncio import asyncio
import base64 from collections.abc import AsyncGenerator
from collections.abc import AsyncGenerator, AsyncIterator
from typing import Any from typing import Any
from ollama import AsyncClient as AsyncOllamaClient from ollama import AsyncClient as AsyncOllamaClient
@ -33,10 +32,6 @@ from llama_stack.apis.inference import (
JsonSchemaResponseFormat, JsonSchemaResponseFormat,
LogProbConfig, LogProbConfig,
Message, Message,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIMessageParam,
OpenAIResponseFormatParam,
ResponseFormat, ResponseFormat,
SamplingParams, SamplingParams,
TextTruncation, TextTruncation,
@ -62,7 +57,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice, OpenAICompatCompletionChoice,
OpenAICompatCompletionResponse, OpenAICompatCompletionResponse,
get_sampling_options, get_sampling_options,
prepare_openai_completion_params,
process_chat_completion_response, process_chat_completion_response,
process_chat_completion_stream_response, process_chat_completion_stream_response,
process_completion_response, process_completion_response,
@ -75,7 +69,6 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
content_has_media, content_has_media,
convert_image_content_to_url, convert_image_content_to_url,
interleaved_content_as_str, interleaved_content_as_str,
localize_image_content,
request_has_media, request_has_media,
) )
@ -84,6 +77,7 @@ logger = get_logger(name=__name__, category="inference::ollama")
class OllamaInferenceAdapter( class OllamaInferenceAdapter(
OpenAIMixin, OpenAIMixin,
ModelRegistryHelper,
InferenceProvider, InferenceProvider,
ModelsProtocolPrivate, ModelsProtocolPrivate,
): ):
@ -129,6 +123,8 @@ class OllamaInferenceAdapter(
], ],
) )
self.config = config self.config = config
# Ollama does not support image urls, so we need to download the image and convert it to base64
self.download_images = True
self._clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {} self._clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {}
@property @property
@ -173,9 +169,6 @@ class OllamaInferenceAdapter(
async def shutdown(self) -> None: async def shutdown(self) -> None:
self._clients.clear() self._clients.clear()
async def unregister_model(self, model_id: str) -> None:
pass
async def _get_model(self, model_id: str) -> Model: async def _get_model(self, model_id: str) -> Model:
if not self.model_store: if not self.model_store:
raise ValueError("Model store not set") raise ValueError("Model store not set")
@ -403,75 +396,6 @@ class OllamaInferenceAdapter(
raise UnsupportedModelError(model.provider_model_id, list(self._model_cache.keys())) raise UnsupportedModelError(model.provider_model_id, list(self._model_cache.keys()))
async def openai_chat_completion(
self,
model: str,
messages: list[OpenAIMessageParam],
frequency_penalty: float | None = None,
function_call: str | dict[str, Any] | None = None,
functions: list[dict[str, Any]] | None = None,
logit_bias: dict[str, float] | None = None,
logprobs: bool | None = None,
max_completion_tokens: int | None = None,
max_tokens: int | None = None,
n: int | None = None,
parallel_tool_calls: bool | None = None,
presence_penalty: float | None = None,
response_format: OpenAIResponseFormatParam | None = None,
seed: int | None = None,
stop: str | list[str] | None = None,
stream: bool | None = None,
stream_options: dict[str, Any] | None = None,
temperature: float | None = None,
tool_choice: str | dict[str, Any] | None = None,
tools: list[dict[str, Any]] | None = None,
top_logprobs: int | None = None,
top_p: float | None = None,
user: str | None = None,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
model_obj = await self._get_model(model)
# Ollama does not support image urls, so we need to download the image and convert it to base64
async def _convert_message(m: OpenAIMessageParam) -> OpenAIMessageParam:
if isinstance(m.content, list):
for c in m.content:
if c.type == "image_url" and c.image_url and c.image_url.url:
localize_result = await localize_image_content(c.image_url.url)
if localize_result is None:
raise ValueError(f"Failed to localize image content from {c.image_url.url}")
content, format = localize_result
c.image_url.url = f"data:image/{format};base64,{base64.b64encode(content).decode('utf-8')}"
return m
messages = [await _convert_message(m) for m in messages]
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
messages=messages,
frequency_penalty=frequency_penalty,
function_call=function_call,
functions=functions,
logit_bias=logit_bias,
logprobs=logprobs,
max_completion_tokens=max_completion_tokens,
max_tokens=max_tokens,
n=n,
parallel_tool_calls=parallel_tool_calls,
presence_penalty=presence_penalty,
response_format=response_format,
seed=seed,
stop=stop,
stream=stream,
stream_options=stream_options,
temperature=temperature,
tool_choice=tool_choice,
tools=tools,
top_logprobs=top_logprobs,
top_p=top_p,
user=user,
)
return await OpenAIMixin.openai_chat_completion(self, **params)
async def convert_message_to_openai_dict_for_ollama(message: Message) -> list[dict]: async def convert_message_to_openai_dict_for_ollama(message: Message) -> list[dict]:
async def _convert_content(content) -> dict: async def _convert_content(content) -> dict:

View file

@ -21,8 +21,6 @@ logger = get_logger(name=__name__, category="inference::openai")
# | completion | LiteLLMOpenAIMixin | # | completion | LiteLLMOpenAIMixin |
# | chat_completion | LiteLLMOpenAIMixin | # | chat_completion | LiteLLMOpenAIMixin |
# | embedding | LiteLLMOpenAIMixin | # | embedding | LiteLLMOpenAIMixin |
# | batch_completion | LiteLLMOpenAIMixin |
# | batch_chat_completion | LiteLLMOpenAIMixin |
# | openai_completion | OpenAIMixin | # | openai_completion | OpenAIMixin |
# | openai_chat_completion | OpenAIMixin | # | openai_chat_completion | OpenAIMixin |
# | openai_embeddings | OpenAIMixin | # | openai_embeddings | OpenAIMixin |

View file

@ -805,6 +805,10 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
properties[param_name].update(description=param.description) properties[param_name].update(description=param.description)
if param.default: if param.default:
properties[param_name].update(default=param.default) properties[param_name].update(default=param.default)
if param.items:
properties[param_name].update(items=param.items)
if param.title:
properties[param_name].update(title=param.title)
if param.required: if param.required:
required.append(param_name) required.append(param_name)

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import base64
import uuid import uuid
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
@ -26,6 +27,7 @@ from llama_stack.apis.models import ModelType
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
logger = get_logger(name=__name__, category="providers::utils") logger = get_logger(name=__name__, category="providers::utils")
@ -51,6 +53,10 @@ class OpenAIMixin(ModelRegistryHelper, ABC):
# This is useful for providers that do not return a unique id in the response. # This is useful for providers that do not return a unique id in the response.
overwrite_completion_id: bool = False overwrite_completion_id: bool = False
# Allow subclasses to control whether to download images and convert to base64
# for providers that require base64 encoded images instead of URLs.
download_images: bool = False
# Embedding model metadata for this provider # Embedding model metadata for this provider
# Can be set by subclasses or instances to provide embedding models # Can be set by subclasses or instances to provide embedding models
# Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}} # Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}}
@ -239,6 +245,24 @@ class OpenAIMixin(ModelRegistryHelper, ABC):
""" """
Direct OpenAI chat completion API call. Direct OpenAI chat completion API call.
""" """
if self.download_images:
async def _localize_image_url(m: OpenAIMessageParam) -> OpenAIMessageParam:
if isinstance(m.content, list):
for c in m.content:
if c.type == "image_url" and c.image_url and c.image_url.url and "http" in c.image_url.url:
localize_result = await localize_image_content(c.image_url.url)
if localize_result is None:
raise ValueError(
f"Failed to localize image content from {c.image_url.url[:42]}{'...' if len(c.image_url.url) > 42 else ''}"
)
content, format = localize_result
c.image_url.url = f"data:image/{format};base64,{base64.b64encode(content).decode('utf-8')}"
# else it's a string and we don't need to modify it
return m
messages = [await _localize_image_url(m) for m in messages]
resp = await self.client.chat.completions.create( resp = await self.client.chat.completions.create(
**await prepare_openai_completion_params( **await prepare_openai_completion_params(
model=await self._get_provider_model_id(model), model=await self._get_provider_model_id(model),

View file

@ -192,6 +192,14 @@ async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
format = "png" format = "png"
return content, format return content, format
elif uri.startswith("data"):
# data:image/{format};base64,{data}
match = re.match(r"data:image/(\w+);base64,(.+)", uri)
if not match:
raise ValueError(f"Invalid data URL format, {uri[:40]}...")
fmt, image_data = match.groups()
content = base64.b64decode(image_data)
return content, fmt
else: else:
return None return None

View file

@ -120,6 +120,10 @@ async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefs
name=param_name, name=param_name,
parameter_type=param_schema.get("type", "string"), parameter_type=param_schema.get("type", "string"),
description=param_schema.get("description", ""), description=param_schema.get("description", ""),
required="default" not in param_schema,
items=param_schema.get("items", None),
title=param_schema.get("title", None),
default=param_schema.get("default", None),
) )
) )
tools.append( tools.append(

View file

@ -28,7 +28,7 @@
"react-markdown": "^10.1.0", "react-markdown": "^10.1.0",
"remark-gfm": "^4.0.1", "remark-gfm": "^4.0.1",
"remeda": "^2.32.0", "remeda": "^2.32.0",
"shiki": "^1.29.2", "shiki": "^3.13.0",
"sonner": "^2.0.7", "sonner": "^2.0.7",
"tailwind-merge": "^3.3.1" "tailwind-merge": "^3.3.1"
}, },
@ -51,7 +51,7 @@
"prettier": "3.6.2", "prettier": "3.6.2",
"tailwindcss": "^4", "tailwindcss": "^4",
"ts-node": "^10.9.2", "ts-node": "^10.9.2",
"tw-animate-css": "^1.2.9", "tw-animate-css": "^1.4.0",
"typescript": "^5" "typescript": "^5"
} }
}, },
@ -3250,65 +3250,63 @@
"license": "MIT" "license": "MIT"
}, },
"node_modules/@shikijs/core": { "node_modules/@shikijs/core": {
"version": "1.29.2", "version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/core/-/core-1.29.2.tgz", "resolved": "https://registry.npmjs.org/@shikijs/core/-/core-3.13.0.tgz",
"integrity": "sha512-vju0lY9r27jJfOY4Z7+Rt/nIOjzJpZ3y+nYpqtUZInVoXQ/TJZcfGnNOGnKjFdVZb8qexiCuSlZRKcGfhhTTZQ==", "integrity": "sha512-3P8rGsg2Eh2qIHekwuQjzWhKI4jV97PhvYjYUzGqjvJfqdQPz+nMlfWahU24GZAyW1FxFI1sYjyhfh5CoLmIUA==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@shikijs/engine-javascript": "1.29.2", "@shikijs/types": "3.13.0",
"@shikijs/engine-oniguruma": "1.29.2", "@shikijs/vscode-textmate": "^10.0.2",
"@shikijs/types": "1.29.2",
"@shikijs/vscode-textmate": "^10.0.1",
"@types/hast": "^3.0.4", "@types/hast": "^3.0.4",
"hast-util-to-html": "^9.0.4" "hast-util-to-html": "^9.0.5"
} }
}, },
"node_modules/@shikijs/engine-javascript": { "node_modules/@shikijs/engine-javascript": {
"version": "1.29.2", "version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-1.29.2.tgz", "resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-3.13.0.tgz",
"integrity": "sha512-iNEZv4IrLYPv64Q6k7EPpOCE/nuvGiKl7zxdq0WFuRPF5PAE9PRo2JGq/d8crLusM59BRemJ4eOqrFrC4wiQ+A==", "integrity": "sha512-Ty7xv32XCp8u0eQt8rItpMs6rU9Ki6LJ1dQOW3V/56PKDcpvfHPnYFbsx5FFUP2Yim34m/UkazidamMNVR4vKg==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@shikijs/types": "1.29.2", "@shikijs/types": "3.13.0",
"@shikijs/vscode-textmate": "^10.0.1", "@shikijs/vscode-textmate": "^10.0.2",
"oniguruma-to-es": "^2.2.0" "oniguruma-to-es": "^4.3.3"
} }
}, },
"node_modules/@shikijs/engine-oniguruma": { "node_modules/@shikijs/engine-oniguruma": {
"version": "1.29.2", "version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-1.29.2.tgz", "resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-3.13.0.tgz",
"integrity": "sha512-7iiOx3SG8+g1MnlzZVDYiaeHe7Ez2Kf2HrJzdmGwkRisT7r4rak0e655AcM/tF9JG/kg5fMNYlLLKglbN7gBqA==", "integrity": "sha512-O42rBGr4UDSlhT2ZFMxqM7QzIU+IcpoTMzb3W7AlziI1ZF7R8eS2M0yt5Ry35nnnTX/LTLXFPUjRFCIW+Operg==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@shikijs/types": "1.29.2", "@shikijs/types": "3.13.0",
"@shikijs/vscode-textmate": "^10.0.1" "@shikijs/vscode-textmate": "^10.0.2"
} }
}, },
"node_modules/@shikijs/langs": { "node_modules/@shikijs/langs": {
"version": "1.29.2", "version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-1.29.2.tgz", "resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-3.13.0.tgz",
"integrity": "sha512-FIBA7N3LZ+223U7cJDUYd5shmciFQlYkFXlkKVaHsCPgfVLiO+e12FmQE6Tf9vuyEsFe3dIl8qGWKXgEHL9wmQ==", "integrity": "sha512-672c3WAETDYHwrRP0yLy3W1QYB89Hbpj+pO4KhxK6FzIrDI2FoEXNiNCut6BQmEApYLfuYfpgOZaqbY+E9b8wQ==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@shikijs/types": "1.29.2" "@shikijs/types": "3.13.0"
} }
}, },
"node_modules/@shikijs/themes": { "node_modules/@shikijs/themes": {
"version": "1.29.2", "version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-1.29.2.tgz", "resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-3.13.0.tgz",
"integrity": "sha512-i9TNZlsq4uoyqSbluIcZkmPL9Bfi3djVxRnofUHwvx/h6SRW3cwgBC5SML7vsDcWyukY0eCzVN980rqP6qNl9g==", "integrity": "sha512-Vxw1Nm1/Od8jyA7QuAenaV78BG2nSr3/gCGdBkLpfLscddCkzkL36Q5b67SrLLfvAJTOUzW39x4FHVCFriPVgg==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@shikijs/types": "1.29.2" "@shikijs/types": "3.13.0"
} }
}, },
"node_modules/@shikijs/types": { "node_modules/@shikijs/types": {
"version": "1.29.2", "version": "3.13.0",
"resolved": "https://registry.npmjs.org/@shikijs/types/-/types-1.29.2.tgz", "resolved": "https://registry.npmjs.org/@shikijs/types/-/types-3.13.0.tgz",
"integrity": "sha512-VJjK0eIijTZf0QSTODEXCqinjBn0joAHQ+aPSBzrv4O2d/QSbsMw+ZeSRx03kV34Hy7NzUvV/7NqfYGRLrASmw==", "integrity": "sha512-oM9P+NCFri/mmQ8LoFGVfVyemm5Hi27330zuOBp0annwJdKH1kOLndw3zCtAVDehPLg9fKqoEx3Ht/wNZxolfw==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@shikijs/vscode-textmate": "^10.0.1", "@shikijs/vscode-textmate": "^10.0.2",
"@types/hast": "^3.0.4" "@types/hast": "^3.0.4"
} }
}, },
@ -6084,12 +6082,6 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/emoji-regex-xs": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex-xs/-/emoji-regex-xs-1.0.0.tgz",
"integrity": "sha512-LRlerrMYoIDrT6jgpeZ2YYl/L8EulRTt5hQcYjy5AInh7HWXKimpqx68aknBFpGL2+/IcogTcaydJEgaTmOpDg==",
"license": "MIT"
},
"node_modules/encodeurl": { "node_modules/encodeurl": {
"version": "2.0.0", "version": "2.0.0",
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
@ -11813,15 +11805,21 @@
"url": "https://github.com/sponsors/sindresorhus" "url": "https://github.com/sponsors/sindresorhus"
} }
}, },
"node_modules/oniguruma-parser": {
"version": "0.12.1",
"resolved": "https://registry.npmjs.org/oniguruma-parser/-/oniguruma-parser-0.12.1.tgz",
"integrity": "sha512-8Unqkvk1RYc6yq2WBYRj4hdnsAxVze8i7iPfQr8e4uSP3tRv0rpZcbGUDvxfQQcdwHt/e9PrMvGCsa8OqG9X3w==",
"license": "MIT"
},
"node_modules/oniguruma-to-es": { "node_modules/oniguruma-to-es": {
"version": "2.3.0", "version": "4.3.3",
"resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-2.3.0.tgz", "resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-4.3.3.tgz",
"integrity": "sha512-bwALDxriqfKGfUufKGGepCzu9x7nJQuoRoAFp4AnwehhC2crqrDIAP/uN2qdlsAvSMpeRC3+Yzhqc7hLmle5+g==", "integrity": "sha512-rPiZhzC3wXwE59YQMRDodUwwT9FZ9nNBwQQfsd1wfdtlKEyCdRV0avrTcSZ5xlIvGRVPd/cx6ZN45ECmS39xvg==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"emoji-regex-xs": "^1.0.0", "oniguruma-parser": "^0.12.1",
"regex": "^5.1.1", "regex": "^6.0.1",
"regex-recursion": "^5.1.1" "regex-recursion": "^6.0.2"
} }
}, },
"node_modules/openid-client": { "node_modules/openid-client": {
@ -12613,21 +12611,20 @@
} }
}, },
"node_modules/regex": { "node_modules/regex": {
"version": "5.1.1", "version": "6.0.1",
"resolved": "https://registry.npmjs.org/regex/-/regex-5.1.1.tgz", "resolved": "https://registry.npmjs.org/regex/-/regex-6.0.1.tgz",
"integrity": "sha512-dN5I359AVGPnwzJm2jN1k0W9LPZ+ePvoOeVMMfqIMFz53sSwXkxaJoxr50ptnsC771lK95BnTrVSZxq0b9yCGw==", "integrity": "sha512-uorlqlzAKjKQZ5P+kTJr3eeJGSVroLKoHmquUj4zHWuR+hEyNqlXsSKlYYF5F4NI6nl7tWCs0apKJ0lmfsXAPA==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"regex-utilities": "^2.3.0" "regex-utilities": "^2.3.0"
} }
}, },
"node_modules/regex-recursion": { "node_modules/regex-recursion": {
"version": "5.1.1", "version": "6.0.2",
"resolved": "https://registry.npmjs.org/regex-recursion/-/regex-recursion-5.1.1.tgz", "resolved": "https://registry.npmjs.org/regex-recursion/-/regex-recursion-6.0.2.tgz",
"integrity": "sha512-ae7SBCbzVNrIjgSbh7wMznPcQel1DNlDtzensnFxpiNpXt1U2ju/bHugH422r+4LAVS1FpW1YCwilmnNsjum9w==", "integrity": "sha512-0YCaSCq2VRIebiaUviZNs0cBz1kg5kVS2UKUfNIx8YVs1cN3AV7NTctO5FOKBA+UT2BPJIWZauYHPqJODG50cg==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"regex": "^5.1.1",
"regex-utilities": "^2.3.0" "regex-utilities": "^2.3.0"
} }
}, },
@ -13165,18 +13162,18 @@
} }
}, },
"node_modules/shiki": { "node_modules/shiki": {
"version": "1.29.2", "version": "3.13.0",
"resolved": "https://registry.npmjs.org/shiki/-/shiki-1.29.2.tgz", "resolved": "https://registry.npmjs.org/shiki/-/shiki-3.13.0.tgz",
"integrity": "sha512-njXuliz/cP+67jU2hukkxCNuH1yUi4QfdZZY+sMr5PPrIyXSu5iTb/qYC4BiWWB0vZ+7TbdvYUCeL23zpwCfbg==", "integrity": "sha512-aZW4l8Og16CokuCLf8CF8kq+KK2yOygapU5m3+hoGw0Mdosc6fPitjM+ujYarppj5ZIKGyPDPP1vqmQhr+5/0g==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@shikijs/core": "1.29.2", "@shikijs/core": "3.13.0",
"@shikijs/engine-javascript": "1.29.2", "@shikijs/engine-javascript": "3.13.0",
"@shikijs/engine-oniguruma": "1.29.2", "@shikijs/engine-oniguruma": "3.13.0",
"@shikijs/langs": "1.29.2", "@shikijs/langs": "3.13.0",
"@shikijs/themes": "1.29.2", "@shikijs/themes": "3.13.0",
"@shikijs/types": "1.29.2", "@shikijs/types": "3.13.0",
"@shikijs/vscode-textmate": "^10.0.1", "@shikijs/vscode-textmate": "^10.0.2",
"@types/hast": "^3.0.4" "@types/hast": "^3.0.4"
} }
}, },
@ -13970,9 +13967,9 @@
"license": "0BSD" "license": "0BSD"
}, },
"node_modules/tw-animate-css": { "node_modules/tw-animate-css": {
"version": "1.2.9", "version": "1.4.0",
"resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz", "resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.4.0.tgz",
"integrity": "sha512-9O4k1at9pMQff9EAcCEuy1UNO43JmaPQvq+0lwza9Y0BQ6LB38NiMj+qHqjoQf40355MX+gs6wtlR6H9WsSXFg==", "integrity": "sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"funding": { "funding": {

View file

@ -33,7 +33,7 @@
"react-markdown": "^10.1.0", "react-markdown": "^10.1.0",
"remark-gfm": "^4.0.1", "remark-gfm": "^4.0.1",
"remeda": "^2.32.0", "remeda": "^2.32.0",
"shiki": "^1.29.2", "shiki": "^3.13.0",
"sonner": "^2.0.7", "sonner": "^2.0.7",
"tailwind-merge": "^3.3.1" "tailwind-merge": "^3.3.1"
}, },
@ -56,7 +56,7 @@
"prettier": "3.6.2", "prettier": "3.6.2",
"tailwindcss": "^4", "tailwindcss": "^4",
"ts-node": "^10.9.2", "ts-node": "^10.9.2",
"tw-animate-css": "^1.2.9", "tw-animate-css": "^1.4.0",
"typescript": "^5" "typescript": "^5"
} }
} }

View file

@ -167,6 +167,8 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
from starlette.responses import Response from starlette.responses import Response
from starlette.routing import Mount, Route from starlette.routing import Mount, Route
from llama_stack.log import get_logger
server = FastMCP("FastMCP Test Server", log_level="WARNING") server = FastMCP("FastMCP Test Server", log_level="WARNING")
tools = tools or default_tools() tools = tools or default_tools()
@ -211,6 +213,7 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
return sock.getsockname()[1] return sock.getsockname()[1]
port = get_open_port() port = get_open_port()
logger = get_logger(__name__, category="tests::mcp")
# make uvicorn logs be less verbose # make uvicorn logs be less verbose
config = uvicorn.Config(app, host="0.0.0.0", port=port, log_level="warning") config = uvicorn.Config(app, host="0.0.0.0", port=port, log_level="warning")
@ -218,10 +221,17 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
app.state.uvicorn_server = server_instance app.state.uvicorn_server = server_instance
def run_server(): def run_server():
try:
logger.info(f"Starting MCP server on port {port}")
server_instance.run() server_instance.run()
logger.info(f"MCP server on port {port} has stopped")
except Exception as e:
logger.error(f"MCP server failed to start on port {port}: {e}")
raise
# Start the server in a new thread # Start the server in a new thread
server_thread = threading.Thread(target=run_server, daemon=True) server_thread = threading.Thread(target=run_server, daemon=True)
logger.info(f"Starting MCP server thread on port {port}")
server_thread.start() server_thread.start()
# Polling until the server is ready # Polling until the server is ready
@ -229,24 +239,36 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
start_time = time.time() start_time = time.time()
server_url = f"http://localhost:{port}/sse" server_url = f"http://localhost:{port}/sse"
logger.info(f"Waiting for MCP server to be ready at {server_url}")
while time.time() - start_time < timeout: while time.time() - start_time < timeout:
try: try:
response = httpx.get(server_url) response = httpx.get(server_url)
if response.status_code in [200, 401]: if response.status_code in [200, 401]:
logger.info(f"MCP server is ready on port {port} (status: {response.status_code})")
break break
except httpx.RequestError: except httpx.RequestError as e:
logger.debug(f"Server not ready yet, retrying... ({e})")
pass pass
time.sleep(0.1) time.sleep(0.1)
else:
# If we exit the loop due to timeout
logger.error(f"MCP server failed to start within {timeout} seconds on port {port}")
logger.error(f"Thread alive: {server_thread.is_alive()}")
if server_thread.is_alive():
logger.error("Server thread is still running but not responding to HTTP requests")
try: try:
yield {"server_url": server_url} yield {"server_url": server_url}
finally: finally:
logger.info(f"Shutting down MCP server on port {port}")
server_instance.should_exit = True server_instance.should_exit = True
time.sleep(0.5) time.sleep(0.5)
# Force shutdown if still running # Force shutdown if still running
if server_thread.is_alive(): if server_thread.is_alive():
try: try:
logger.info("Force shutting down server thread")
if hasattr(server_instance, "servers") and server_instance.servers: if hasattr(server_instance, "servers") and server_instance.servers:
for srv in server_instance.servers: for srv in server_instance.servers:
srv.close() srv.close()
@ -254,9 +276,9 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
# Wait for graceful shutdown # Wait for graceful shutdown
server_thread.join(timeout=3) server_thread.join(timeout=3)
if server_thread.is_alive(): if server_thread.is_alive():
print("Warning: Server thread still alive after shutdown attempt") logger.warning("Server thread still alive after shutdown attempt")
except Exception as e: except Exception as e:
print(f"Error during server shutdown: {e}") logger.error(f"Error during server shutdown: {e}")
# CRITICAL: Reset SSE global state to prevent event loop contamination # CRITICAL: Reset SSE global state to prevent event loop contamination
# Reset the SSE AppStatus singleton that stores anyio.Event objects # Reset the SSE AppStatus singleton that stores anyio.Event objects

View file

@ -1,76 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
from ..test_cases.test_case import TestCase
def skip_if_provider_doesnt_support_batch_inference(client_with_models, model_id):
models = {m.identifier: m for m in client_with_models.models.list()}
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
if provider.provider_type not in ("inline::meta-reference",):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support batch inference")
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:batch_completion",
],
)
def test_batch_completion_non_streaming(client_with_models, text_model_id, test_case):
skip_if_provider_doesnt_support_batch_inference(client_with_models, text_model_id)
tc = TestCase(test_case)
content_batch = tc["contents"]
response = client_with_models.inference.batch_completion(
content_batch=content_batch,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
},
)
assert len(response.batch) == len(content_batch)
for i, r in enumerate(response.batch):
print(f"response {i}: {r.content}")
assert len(r.content) > 10
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:batch_completion",
],
)
def test_batch_chat_completion_non_streaming(client_with_models, text_model_id, test_case):
skip_if_provider_doesnt_support_batch_inference(client_with_models, text_model_id)
tc = TestCase(test_case)
qa_pairs = tc["qa_pairs"]
message_batch = [
[
{
"role": "user",
"content": qa["question"],
}
]
for qa in qa_pairs
]
response = client_with_models.inference.batch_chat_completion(
messages_batch=message_batch,
model_id=text_model_id,
)
assert len(response.batch) == len(qa_pairs)
for i, r in enumerate(response.batch):
print(f"response {i}: {r.completion_message.content}")
assert len(r.completion_message.content) > 0
assert qa_pairs[i]["answer"].lower() in r.completion_message.content.lower()

View file

@ -1,303 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
#
# Test plan:
#
# Types of input:
# - array of a string
# - array of a image (ImageContentItem, either URL or base64 string)
# - array of a text (TextContentItem)
# Types of output:
# - list of list of floats
# Params:
# - text_truncation
# - absent w/ long text -> error
# - none w/ long text -> error
# - absent w/ short text -> ok
# - none w/ short text -> ok
# - end w/ long text -> ok
# - end w/ short text -> ok
# - start w/ long text -> ok
# - start w/ short text -> ok
# - output_dimension
# - response dimension matches
# - task_type, only for asymmetric models
# - query embedding != passage embedding
# Negative:
# - long string
# - long text
#
# Todo:
# - negative tests
# - empty
# - empty list
# - empty string
# - empty text
# - empty image
# - long
# - large image
# - appropriate combinations
# - batch size
# - many inputs
# - invalid
# - invalid URL
# - invalid base64
#
# Notes:
# - use llama_stack_client fixture
# - use pytest.mark.parametrize when possible
# - no accuracy tests: only check the type of output, not the content
#
import pytest
from llama_stack_client import BadRequestError as LlamaStackBadRequestError
from llama_stack_client.types import EmbeddingsResponse
from llama_stack_client.types.shared.interleaved_content import (
ImageContentItem,
ImageContentItemImage,
ImageContentItemImageURL,
TextContentItem,
)
from openai import BadRequestError as OpenAIBadRequestError
from llama_stack.core.library_client import LlamaStackAsLibraryClient
DUMMY_STRING = "hello"
DUMMY_STRING2 = "world"
DUMMY_LONG_STRING = "NVDA " * 10240
DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
DUMMY_LONG_TEXT = TextContentItem(text=DUMMY_LONG_STRING, type="text")
# TODO(mf): add a real image URL and base64 string
DUMMY_IMAGE_URL = ImageContentItem(
image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
)
DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
SUPPORTED_PROVIDERS = {"remote::nvidia"}
MODELS_SUPPORTING_MEDIA = {}
MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"}
MODELS_REQUIRING_TASK_TYPE = {
"nvidia/llama-3.2-nv-embedqa-1b-v2",
"nvidia/nv-embedqa-e5-v5",
"nvidia/nv-embedqa-mistral-7b-v2",
"snowflake/arctic-embed-l",
}
MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE
def default_task_type(model_id):
"""
Some models require a task type parameter. This provides a default value for
testing those models.
"""
if model_id in MODELS_REQUIRING_TASK_TYPE:
return {"task_type": "query"}
return {}
@pytest.mark.parametrize(
"contents",
[
[DUMMY_STRING, DUMMY_STRING2],
[DUMMY_TEXT, DUMMY_TEXT2],
],
ids=[
"list[string]",
"list[text]",
],
)
def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_IMAGE_URL, DUMMY_IMAGE_BASE64],
[DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT],
],
ids=[
"list[url,base64]",
"list[url,string,base64,text]",
],
)
def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
if embedding_model_id not in MODELS_SUPPORTING_MEDIA:
pytest.xfail(f"{embedding_model_id} doesn't support media")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
"end",
"start",
],
)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_LONG_TEXT],
[DUMMY_STRING],
],
ids=[
"long",
"short",
],
)
def test_embedding_truncation(
llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=contents,
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == 1
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
None,
"none",
],
)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_LONG_TEXT],
[DUMMY_LONG_STRING],
],
ids=[
"long-text",
"long-str",
],
)
def test_embedding_truncation_error(
llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
# Using LlamaStackClient from llama_stack_client will raise llama_stack_client.BadRequestError
# While using LlamaStackAsLibraryClient from llama_stack.distribution.library_client will raise the error that the backend raises
error_type = (
OpenAIBadRequestError
if isinstance(llama_stack_client, LlamaStackAsLibraryClient)
else LlamaStackBadRequestError
)
with pytest.raises(error_type):
llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_LONG_TEXT],
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)
def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION:
pytest.xfail(f"{embedding_model_id} doesn't support output_dimension")
base_response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id)
)
test_response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_STRING],
**default_task_type(embedding_model_id),
output_dimension=32,
)
assert len(base_response.embeddings[0]) != len(test_response.embeddings[0])
assert len(test_response.embeddings[0]) == 32
def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE:
pytest.xfail(f"{embedding_model_id} doesn't support task_type")
query_embedding = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
)
document_embedding = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="document"
)
assert query_embedding.embeddings != document_embedding.embeddings
@pytest.mark.parametrize(
"text_truncation",
[
None,
"none",
"end",
"start",
],
)
def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_STRING],
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == 1
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
"NONE",
"END",
"START",
"left",
"right",
],
)
def test_embedding_text_truncation_error(
llama_stack_client, embedding_model_id, text_truncation, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
error_type = ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
with pytest.raises(error_type):
llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_STRING],
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)

View file

@ -9,6 +9,7 @@ import time
import unicodedata import unicodedata
import pytest import pytest
from pydantic import BaseModel
from ..test_cases.test_case import TestCase from ..test_cases.test_case import TestCase
@ -62,6 +63,14 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.") pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
def skip_if_doesnt_support_completions_logprobs(client_with_models, model_id):
provider_type = provider_from_model(client_with_models, model_id).provider_type
if provider_type in (
"remote::ollama", # logprobs is ignored
):
pytest.skip(f"Model {model_id} hosted by {provider_type} doesn't support /v1/completions logprobs.")
def skip_if_model_doesnt_support_suffix(client_with_models, model_id): def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
# To test `fim` ( fill in the middle ) completion, we need to use a model that supports suffix. # To test `fim` ( fill in the middle ) completion, we need to use a model that supports suffix.
# Use this to specifically test this API functionality. # Use this to specifically test this API functionality.
@ -205,28 +214,6 @@ def test_openai_completion_streaming(llama_stack_client, client_with_models, tex
assert len(content_str) > 10 assert len(content_str) > 10
@pytest.mark.parametrize(
"prompt_logprobs",
[
1,
0,
],
)
def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs):
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
prompt = "Hello, world!"
response = llama_stack_client.completions.create(
model=text_model_id,
prompt=prompt,
stream=False,
prompt_logprobs=prompt_logprobs,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert len(choice.prompt_logprobs) > 0
def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id): def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id):
skip_if_provider_isnt_vllm(client_with_models, text_model_id) skip_if_provider_isnt_vllm(client_with_models, text_model_id)
@ -518,3 +505,214 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
message_content = response.choices[0].message.content.lower().strip() message_content = response.choices[0].message.content.lower().strip()
normalized_content = _normalize_text(message_content) normalized_content = _normalize_text(message_content)
assert "hello world" in normalized_content assert "hello world" in normalized_content
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:stop_sequence",
],
)
def test_openai_completion_stop_sequence(client_with_models, openai_client, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
stop="1963",
stream=False,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert "1963" not in choice.text
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
stop=["blathering", "1963"],
stream=False,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert "1963" not in choice.text
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_openai_completion_logprobs(client_with_models, openai_client, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
tc = TestCase(test_case)
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
logprobs=5,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert choice.text, "Response text should not be empty"
assert choice.logprobs, "Logprobs should not be empty"
logprobs = choice.logprobs
assert logprobs.token_logprobs, "Response tokens should not be empty"
assert len(logprobs.tokens) == len(logprobs.token_logprobs)
assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
assert logprobs.top_logprobs[i][token] == prob
assert len(logprobs.top_logprobs[i]) == 5
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_openai_completion_logprobs_streaming(client_with_models, openai_client, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
tc = TestCase(test_case)
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
logprobs=3,
stream=True,
max_tokens=5,
)
for chunk in response:
choice = chunk.choices[0]
choice = response.choices[0]
if choice.text: # if there's a token, we expect logprobs
assert choice.logprobs, "Logprobs should not be empty"
logprobs = choice.logprobs
assert logprobs.token_logprobs, "Response tokens should not be empty"
assert len(logprobs.tokens) == len(logprobs.token_logprobs)
assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
assert logprobs.top_logprobs[i][token] == prob
assert len(logprobs.top_logprobs[i]) == 3
else: # no token, no logprobs
assert not choice.logprobs, "Logprobs should be empty"
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_openai_chat_completion_with_tools(openai_client, text_model_id, test_case):
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=False,
)
assert len(response.choices) == 1
assert len(response.choices[0].message.tool_calls) == 1
tool_call = response.choices[0].message.tool_calls[0]
assert tool_call.function.name == tc["tools"][0]["function"]["name"]
assert "location" in tool_call.function.arguments
assert tc["expected"]["location"] in tool_call.function.arguments
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_openai_chat_completion_with_tools_and_streaming(openai_client, text_model_id, test_case):
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=True,
)
# Accumulate tool calls from streaming chunks
tool_calls = []
for chunk in response:
if chunk.choices and chunk.choices[0].delta.tool_calls:
for i, tc_delta in enumerate(chunk.choices[0].delta.tool_calls):
while len(tool_calls) <= i:
tool_calls.append({"function": {"name": "", "arguments": ""}})
if tc_delta.function and tc_delta.function.name:
tool_calls[i]["function"]["name"] = tc_delta.function.name
if tc_delta.function and tc_delta.function.arguments:
tool_calls[i]["function"]["arguments"] += tc_delta.function.arguments
assert len(tool_calls) == 1
tool_call = tool_calls[0]
assert tool_call["function"]["name"] == tc["tools"][0]["function"]["name"]
assert "location" in tool_call["function"]["arguments"]
assert tc["expected"]["location"] in tool_call["function"]["arguments"]
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_openai_chat_completion_with_tool_choice_none(openai_client, text_model_id, test_case):
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="none",
stream=False,
)
assert len(response.choices) == 1
tool_calls = response.choices[0].message.tool_calls
assert tool_calls is None or len(tool_calls) == 0
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:structured_output",
],
)
def test_openai_chat_completion_structured_output(openai_client, text_model_id, test_case):
# Note: Skip condition may need adjustment for OpenAI client
class AnswerFormat(BaseModel):
first_name: str
last_name: str
year_of_birth: int
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
response_format={
"type": "json_schema",
"json_schema": {
"name": "AnswerFormat",
"schema": AnswerFormat.model_json_schema(),
},
},
stream=False,
)
print(response.choices[0].message.content)
answer = AnswerFormat.model_validate_json(response.choices[0].message.content)
expected = tc["expected"]
assert answer.first_name == expected["first_name"]
assert answer.last_name == expected["last_name"]
assert answer.year_of_birth == expected["year_of_birth"]

View file

@ -0,0 +1,77 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
import pathlib
import pytest
@pytest.fixture
def image_path():
return pathlib.Path(__file__).parent / "dog.png"
@pytest.fixture
def base64_image_data(image_path):
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
async def test_openai_chat_completion_image_url(openai_client, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = openai_client.chat.completions.create(
model=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
async def test_openai_chat_completion_image_data(openai_client, vision_model_id, base64_image_data):
message = {
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image_data}",
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = openai_client.chat.completions.create(
model=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})

View file

@ -1,545 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from time import sleep
import pytest
from pydantic import BaseModel
from llama_stack.models.llama.sku_list import resolve_model
from ..test_cases.test_case import TestCase
PROVIDER_LOGPROBS_TOP_K = {"remote::together", "remote::fireworks", "remote::vllm"}
def skip_if_model_doesnt_support_completion(client_with_models, model_id):
models = {m.identifier: m for m in client_with_models.models.list()}
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
if (
provider.provider_type
in (
"remote::openai",
"remote::anthropic",
"remote::gemini",
"remote::vertexai",
"remote::groq",
"remote::sambanova",
"remote::azure",
)
or "openai-compat" in provider.provider_type
):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")
def skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, model_id):
models = {m.identifier: m for m in client_with_models.models.list()}
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
if provider.provider_type in ("remote::sambanova", "remote::azure", "remote::watsonx"):
pytest.skip(
f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
)
def get_llama_model(client_with_models, model_id):
models = {}
for m in client_with_models.models.list():
models[m.identifier] = m
models[m.provider_resource_id] = m
assert model_id in models, f"Model {model_id} not found"
model = models[model_id]
ids = (model.identifier, model.provider_resource_id)
for mid in ids:
if resolve_model(mid):
return mid
return model.metadata.get("llama_model", None)
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:sanity",
],
)
def test_text_completion_non_streaming(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=False,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
},
)
assert len(response.content) > 10
# assert "blue" in response.content.lower().strip()
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:sanity",
],
)
def test_text_completion_streaming(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=True,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
},
)
streamed_content = [chunk.delta for chunk in response]
content_str = "".join(streamed_content).lower().strip()
# assert "blue" in content_str
assert len(content_str) > 10
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:stop_sequence",
],
)
def test_text_completion_stop_sequence(client_with_models, text_model_id, inference_provider_type, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
# This is only supported/tested for remote vLLM: https://github.com/meta-llama/llama-stack/issues/1771
if inference_provider_type != "remote::vllm":
pytest.xfail(f"{inference_provider_type} doesn't support 'stop' parameter yet")
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=True,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
"stop": ["1963"],
},
)
streamed_content = [chunk.delta for chunk in response]
content_str = "".join(streamed_content).lower().strip()
assert "1963" not in content_str
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_text_completion_log_probs_non_streaming(client_with_models, text_model_id, inference_provider_type, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
if inference_provider_type not in PROVIDER_LOGPROBS_TOP_K:
pytest.xfail(f"{inference_provider_type} doesn't support log probs yet")
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=False,
model_id=text_model_id,
sampling_params={
"max_tokens": 5,
},
logprobs={
"top_k": 1,
},
)
assert response.logprobs, "Logprobs should not be empty"
assert 1 <= len(response.logprobs) <= 5 # each token has 1 logprob and here max_tokens=5
assert all(len(logprob.logprobs_by_token) == 1 for logprob in response.logprobs)
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_text_completion_log_probs_streaming(client_with_models, text_model_id, inference_provider_type, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
if inference_provider_type not in PROVIDER_LOGPROBS_TOP_K:
pytest.xfail(f"{inference_provider_type} doesn't support log probs yet")
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=True,
model_id=text_model_id,
sampling_params={
"max_tokens": 5,
},
logprobs={
"top_k": 1,
},
)
streamed_content = list(response)
for chunk in streamed_content:
if chunk.delta: # if there's a token, we expect logprobs
assert chunk.logprobs, "Logprobs should not be empty"
assert all(len(logprob.logprobs_by_token) == 1 for logprob in chunk.logprobs)
else: # no token, no logprobs
assert not chunk.logprobs, "Logprobs should be empty"
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:structured_output",
],
)
def test_text_completion_structured_output(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
class AnswerFormat(BaseModel):
name: str
year_born: str
year_retired: str
tc = TestCase(test_case)
user_input = tc["user_input"]
response = client_with_models.inference.completion(
model_id=text_model_id,
content=user_input,
stream=False,
sampling_params={
"max_tokens": 50,
},
response_format={
"type": "json_schema",
"json_schema": AnswerFormat.model_json_schema(),
},
)
answer = AnswerFormat.model_validate_json(response.content)
expected = tc["expected"]
assert answer.name == expected["name"]
assert answer.year_born == expected["year_born"]
assert answer.year_retired == expected["year_retired"]
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:non_streaming_01",
"inference:chat_completion:non_streaming_02",
],
)
def test_text_chat_completion_non_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
question = tc["question"]
expected = tc["expected"]
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=[
{
"role": "user",
"content": question,
}
],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0
assert expected.lower() in message_content
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:streaming_01",
"inference:chat_completion:streaming_02",
],
)
def test_text_chat_completion_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
question = tc["question"]
expected = tc["expected"]
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=[{"role": "user", "content": question}],
stream=True,
timeout=120, # Increase timeout to 2 minutes for large conversation history
)
streamed_content = [str(chunk.event.delta.text.lower().strip()) for chunk in response]
assert len(streamed_content) > 0
assert expected.lower() in "".join(streamed_content)
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_calling_and_non_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=False,
)
# some models can return content for the response in addition to the tool call
assert response.completion_message.role == "assistant"
assert len(response.completion_message.tool_calls) == 1
assert response.completion_message.tool_calls[0].tool_name == tc["tools"][0]["tool_name"]
assert response.completion_message.tool_calls[0].arguments == tc["expected"]
# Will extract streamed text and separate it from tool invocation content
# The returned tool inovcation content will be a string so it's easy to comapare with expected value
# e.g. "[get_weather, {'location': 'San Francisco, CA'}]"
def extract_tool_invocation_content(response):
tool_invocation_content: str = ""
for chunk in response:
delta = chunk.event.delta
if delta.type == "tool_call" and delta.parse_status == "succeeded":
call = delta.tool_call
tool_invocation_content += f"[{call.tool_name}, {call.arguments}]"
return tool_invocation_content
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_calling_and_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=True,
)
tool_invocation_content = extract_tool_invocation_content(response)
expected_tool_name = tc["tools"][0]["tool_name"]
expected_argument = tc["expected"]
assert tool_invocation_content == f"[{expected_tool_name}, {expected_argument}]"
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_choice_required(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_config={
"tool_choice": "required",
},
stream=True,
)
tool_invocation_content = extract_tool_invocation_content(response)
expected_tool_name = tc["tools"][0]["tool_name"]
expected_argument = tc["expected"]
assert tool_invocation_content == f"[{expected_tool_name}, {expected_argument}]"
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_choice_none(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_config={"tool_choice": "none"},
stream=True,
)
tool_invocation_content = extract_tool_invocation_content(response)
assert tool_invocation_content == ""
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:structured_output",
],
)
def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
class NBAStats(BaseModel):
year_for_draft: int
num_seasons_in_nba: int
class AnswerFormat(BaseModel):
first_name: str
last_name: str
year_of_birth: int
nba_stats: NBAStats
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
response_format={
"type": "json_schema",
"json_schema": AnswerFormat.model_json_schema(),
},
stream=False,
)
answer = AnswerFormat.model_validate_json(response.completion_message.content)
expected = tc["expected"]
assert answer.first_name == expected["first_name"]
assert answer.last_name == expected["last_name"]
assert answer.year_of_birth == expected["year_of_birth"]
assert answer.nba_stats.num_seasons_in_nba == expected["num_seasons_in_nba"]
assert answer.nba_stats.year_for_draft == expected["year_for_draft"]
@pytest.mark.parametrize("streaming", [True, False])
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling_tools_absent",
],
)
def test_text_chat_completion_tool_calling_tools_not_in_request(
client_with_models, text_model_id, test_case, streaming
):
tc = TestCase(test_case)
# TODO: more dynamic lookup on tool_prompt_format for model family
tool_prompt_format = "json" if "3.1" in text_model_id else "python_list"
request = {
"model_id": text_model_id,
"messages": tc["messages"],
"tools": tc["tools"],
"tool_choice": "auto",
"tool_prompt_format": tool_prompt_format,
"stream": streaming,
}
response = client_with_models.inference.chat_completion(**request)
if streaming:
for chunk in response:
delta = chunk.event.delta
if delta.type == "tool_call" and delta.parse_status == "succeeded":
assert delta.tool_call.tool_name == "get_object_namespace_list"
if delta.type == "tool_call" and delta.parse_status == "failed":
# expect raw message that failed to parse in tool_call
assert isinstance(delta.tool_call, str)
assert len(delta.tool_call) > 0
else:
for tc in response.completion_message.tool_calls:
assert tc.tool_name == "get_object_namespace_list"
@pytest.mark.parametrize(
"test_case",
[
# Tests if the model can handle simple messages like "Hi" or
# a message unrelated to one of the tool calls
"inference:chat_completion:text_then_tool",
# Tests if the model can do full tool call with responses correctly
"inference:chat_completion:tool_then_answer",
# Tests if model can generate multiple params and
# read outputs correctly
"inference:chat_completion:array_parameter",
],
)
def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
"""This test tests the model's tool calling loop in various scenarios"""
if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
pytest.xfail("Not tested for non-llama4 models yet")
tc = TestCase(test_case)
messages = []
# keep going until either
# 1. we have messages to test in multi-turn
# 2. no messages bust last message is tool response
while len(tc["messages"]) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"):
# do not take new messages if last message is tool response
if len(messages) == 0 or messages[-1]["role"] != "tool":
new_messages = tc["messages"].pop(0)
messages += new_messages
# pprint(messages)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=messages,
tools=tc["tools"],
stream=False,
sampling_params={
"strategy": {
"type": "top_p",
"top_p": 0.9,
"temperature": 0.6,
}
},
)
op_msg = response.completion_message
messages.append(op_msg.model_dump())
# print(op_msg)
assert op_msg.role == "assistant"
expected = tc["expected"].pop(0)
assert len(op_msg.tool_calls) == expected["num_tool_calls"]
if expected["num_tool_calls"] > 0:
assert op_msg.tool_calls[0].tool_name == expected["tool_name"]
assert op_msg.tool_calls[0].arguments == expected["tool_arguments"]
tool_response = tc["tool_responses"].pop(0)
messages.append(
# Tool Response Message
{
"role": "tool",
"call_id": op_msg.tool_calls[0].call_id,
"content": tool_response["response"],
}
)
else:
actual_answer = op_msg.content.lower()
# pprint(actual_answer)
assert expected["answer"] in actual_answer
# sleep to avoid rate limit
sleep(1)

View file

@ -25,16 +25,19 @@ def base64_image_data(image_path):
return base64.b64encode(image_path.read_bytes()).decode("utf-8") return base64.b64encode(image_path.read_bytes()).decode("utf-8")
@pytest.fixture
def base64_image_url(base64_image_data):
return f"data:image/png;base64,{base64_image_data}"
def test_image_chat_completion_non_streaming(client_with_models, vision_model_id): def test_image_chat_completion_non_streaming(client_with_models, vision_model_id):
message = { message = {
"role": "user", "role": "user",
"content": [ "content": [
{ {
"type": "image", "type": "image_url",
"image": { "image_url": {
"url": { "url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
"uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
}, },
}, },
{ {
@ -43,12 +46,12 @@ def test_image_chat_completion_non_streaming(client_with_models, vision_model_id
}, },
], ],
} }
response = client_with_models.inference.chat_completion( response = client_with_models.chat.completions.create(
model_id=vision_model_id, model=vision_model_id,
messages=[message], messages=[message],
stream=False, stream=False,
) )
message_content = response.completion_message.content.lower().strip() message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0 assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"}) assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
@ -68,8 +71,13 @@ def multi_image_data():
return encoded_files return encoded_files
@pytest.fixture
def multi_image_url(multi_image_data):
return [f"data:image/jpeg;base64,{data}" for data in multi_image_data]
@pytest.mark.parametrize("stream", [True, False]) @pytest.mark.parametrize("stream", [True, False])
def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream): def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_url, stream):
supported_models = ["llama-4", "gpt-4o", "llama4"] supported_models = ["llama-4", "gpt-4o", "llama4"]
if not any(model in vision_model_id.lower() for model in supported_models): if not any(model in vision_model_id.lower() for model in supported_models):
pytest.skip( pytest.skip(
@ -81,15 +89,15 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
"role": "user", "role": "user",
"content": [ "content": [
{ {
"type": "image", "type": "image_url",
"image": { "image_url": {
"data": multi_image_data[0], "url": multi_image_url[0],
}, },
}, },
{ {
"type": "image", "type": "image_url",
"image": { "image_url": {
"data": multi_image_data[1], "url": multi_image_url[1],
}, },
}, },
{ {
@ -99,17 +107,17 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
], ],
}, },
] ]
response = client_with_models.inference.chat_completion( response = client_with_models.chat.completions.create(
model_id=vision_model_id, model=vision_model_id,
messages=messages, messages=messages,
stream=stream, stream=stream,
) )
if stream: if stream:
message_content = "" message_content = ""
for chunk in response: for chunk in response:
message_content += chunk.event.delta.text message_content += chunk.choices[0].delta.content
else: else:
message_content = response.completion_message.content message_content = response.choices[0].message.content
assert len(message_content) > 0 assert len(message_content) > 0
assert any(expected in message_content.lower().strip() for expected in {"bedroom"}), message_content assert any(expected in message_content.lower().strip() for expected in {"bedroom"}), message_content
@ -125,17 +133,17 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
"role": "user", "role": "user",
"content": [ "content": [
{ {
"type": "image", "type": "image_url",
"image": { "image_url": {
"data": multi_image_data[2], "url": multi_image_data[2],
}, },
}, },
{"type": "text", "text": "How about this one?"}, {"type": "text", "text": "How about this one?"},
], ],
}, },
) )
response = client_with_models.inference.chat_completion( response = client_with_models.chat.completions.create(
model_id=vision_model_id, model=vision_model_id,
messages=messages, messages=messages,
stream=stream, stream=stream,
) )
@ -144,7 +152,7 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
for chunk in response: for chunk in response:
message_content += chunk.event.delta.text message_content += chunk.event.delta.text
else: else:
message_content = response.completion_message.content message_content = response.choices[0].message.content
assert len(message_content) > 0 assert len(message_content) > 0
assert any(expected in message_content.lower().strip() for expected in {"sword", "shield"}), message_content assert any(expected in message_content.lower().strip() for expected in {"sword", "shield"}), message_content
@ -154,11 +162,9 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
"role": "user", "role": "user",
"content": [ "content": [
{ {
"type": "image", "type": "image_url",
"image": { "image_url": {
"url": { "url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
"uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
}, },
}, },
{ {
@ -167,23 +173,23 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
}, },
], ],
} }
response = client_with_models.inference.chat_completion( response = client_with_models.chat.completions.create(
model_id=vision_model_id, model=vision_model_id,
messages=[message], messages=[message],
stream=True, stream=True,
) )
streamed_content = "" streamed_content = ""
for chunk in response: for chunk in response:
streamed_content += chunk.event.delta.text.lower() streamed_content += chunk.choices[0].delta.content.lower()
assert len(streamed_content) > 0 assert len(streamed_content) > 0
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"}) assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_data): def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_url):
image_spec = { image_spec = {
"type": "image", "type": "image_url",
"image": { "image_url": {
"data": base64_image_data, "url": base64_image_url,
}, },
} }
@ -197,10 +203,10 @@ def test_image_chat_completion_base64(client_with_models, vision_model_id, base6
}, },
], ],
} }
response = client_with_models.inference.chat_completion( response = client_with_models.chat.completions.create(
model_id=vision_model_id, model=vision_model_id,
messages=[message], messages=[message],
stream=False, stream=False,
) )
message_content = response.completion_message.content.lower().strip() message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0 assert len(message_content) > 0

View file

@ -14,6 +14,13 @@ from . import skip_in_github_actions
# LLAMA_STACK_CONFIG="nvidia" pytest -v tests/integration/providers/nvidia/test_datastore.py # LLAMA_STACK_CONFIG="nvidia" pytest -v tests/integration/providers/nvidia/test_datastore.py
@pytest.fixture(autouse=True)
def skip_if_no_nvidia_provider(llama_stack_client):
provider_types = {p.provider_type for p in llama_stack_client.providers.list() if p.api == "datasetio"}
if "remote::nvidia" not in provider_types:
pytest.skip("datasetio=remote::nvidia provider not configured, skipping")
# nvidia provider only # nvidia provider only
@skip_in_github_actions @skip_in_github_actions
@pytest.mark.parametrize( @pytest.mark.parametrize(

View file

@ -0,0 +1,167 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"greet_everyone\",\n \"description\": \"\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"url\"],\n \"properties\": {\n \"url\": {\n \"type\": \"string\",\n \"description\": \"\"\n }\n }\n }\n },\n {\n \"name\": \"get_boiling_point\",\n \"description\": \"\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n \",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"liquid_name\"],\n \"properties\": {\n \"liquid_name\": {\n \"type\": \"string\",\n \"description\": \"\"\n },\n \"celsius\": {\n \"type\": \"boolean\",\n \"description\": \"\",\n \"default\": \"True\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[greet_everyone(url=\"world\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.663224Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "How",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.706706Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " can",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.751075Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " I",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.794187Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " assist",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.837831Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " you",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.879926Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " further",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.92182Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "?",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.963339Z",
"done": true,
"done_reason": "stop",
"total_duration": 492973041,
"load_duration": 103979375,
"prompt_eval_count": 482,
"prompt_eval_duration": 87032041,
"eval_count": 8,
"eval_duration": 300586375,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,31 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": [],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 0,
"total_tokens": 0,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,89 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
},
{
"role": "user",
"content": "Please give me information about Michael Jordan."
}
],
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "AnswerFormat",
"schema": {
"properties": {
"first_name": {
"title": "First Name",
"type": "string"
},
"last_name": {
"title": "Last Name",
"type": "string"
},
"year_of_birth": {
"title": "Year Of Birth",
"type": "integer"
}
},
"required": [
"first_name",
"last_name",
"year_of_birth"
],
"title": "AnswerFormat",
"type": "object"
}
}
},
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-433",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}\n\n \t\t\t\t\t\t\t\t\t\t\t \t\t ",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758979490,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 31,
"prompt_tokens": 60,
"total_tokens": 91,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,31 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": [],
"encoding_format": "base64"
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 0,
"total_tokens": 0,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,316 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "What's the name of the Sun in latin?"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": "The Latin",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " name",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " for",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " Sun",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " \"",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": "Sol",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": "\".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 11,
"prompt_tokens": 20,
"total_tokens": 31,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,44 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Hello, world!",
"logprobs": false,
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-74",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Hello! How can I assist you today?"
}
],
"created": 1758975636,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 29,
"total_tokens": 39,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,92 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"stream": false,
"tool_choice": "auto",
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-761",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "call_cj8ownwc",
"function": {
"arguments": "{\"location\":\"San Francisco, CA\"}",
"name": "get_weather"
},
"type": "function",
"index": 0
}
]
}
}
],
"created": 1758975113,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 18,
"prompt_tokens": 185,
"total_tokens": 203,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,44 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Hello, world!",
"logprobs": true,
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-809",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Hello! It's nice to meet you. Is there anything I can help you with or would you like to chat?"
}
],
"created": 1758975633,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 25,
"prompt_tokens": 29,
"total_tokens": 54,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,550 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "What is the name of the US captial?"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": "The name",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " US",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " capital",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " Washington",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ",",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " D",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ".C",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " (",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": "short",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " for",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " District",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " Columbia",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ").",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 20,
"prompt_tokens": 20,
"total_tokens": 40,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,60 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-123",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! As of my knowledge cutoff on December 15th, I have the latest information for you. However, please note that my data may not be entirely up-to-date.\n\nCurrently, and based on historical climate patterns, it appears to be a partly cloudy day with mild temperatures in San Francisco, CA. Expect a temperature range of around 48\u00b0F (9\u00b0C) to 54\u00b0F (12\u00b0C). It's likely to be a breezy day, with winds blowing at about 13 mph (21 km/h).\n\nHowever, if I were to look into more recent weather patterns or forecasts, I would recommend checking the latest conditions directly from reliable sources such as the National Weather Service or local news outlets for more accurate and up-to-date information.\n\nPlease let me know how I can further assist you.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758978071,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 163,
"prompt_tokens": 45,
"total_tokens": 208,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": "Test dimensions parameter",
"encoding_format": "float",
"dimensions": 16
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.9296875,
5.1875,
-2.140625,
0.171875,
-2.25,
-0.8359375,
-0.828125,
1.15625,
2.328125,
-1.0078125,
-3.0,
4.09375,
0.8359375,
0.1015625,
2.015625,
-1.0859375
],
"index": 0,
"object": "embedding",
"raw_output": null
}
],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,39 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": "Test dimensions parameter",
"encoding_format": "base64",
"dimensions": 16
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": "AABuvwAApkAAAAnAAAAwPgAAEMAAAFa/AABUvwAAlD8AABVAAACBvwAAQMAAAINAAABWPwAA0D0AAAFAAACLvw==",
"index": 0,
"object": "embedding",
"raw_output": null
}
],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "1d64ff81-b7c4-40c6-9509-cca71759da3e",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920401,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 26,
"prompt_tokens": 14,
"total_tokens": 40,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,347 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"greet_everyone\",\n \"description\": \"\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"url\"],\n \"properties\": {\n \"url\": {\n \"type\": \"string\",\n \"description\": \"\"\n }\n }\n }\n },\n {\n \"name\": \"get_boiling_point\",\n \"description\": \"\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n \",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"liquid_name\"],\n \"properties\": {\n \"liquid_name\": {\n \"type\": \"string\",\n \"description\": \"\"\n },\n \"celsius\": {\n \"type\": \"boolean\",\n \"description\": \"\",\n \"default\": \"True\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[greet_everyone(url=\"world\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHow can I assist you further?<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the boiling point of polyjuice? Use tools to answer.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.177453Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.220271Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "get",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.261232Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_bo",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.302818Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "iling",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.344343Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_point",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.386025Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "(",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.42778Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "liquid",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.469673Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_name",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.512543Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "='",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.554479Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "poly",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.597092Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "ju",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.639581Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "ice",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.683223Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "',",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.72556Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " c",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.768012Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "elsius",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.8098Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "=True",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.851578Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ")]",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:57.893693Z",
"done": true,
"done_reason": "stop",
"total_duration": 885274541,
"load_duration": 99578333,
"prompt_eval_count": 514,
"prompt_eval_duration": 67915875,
"eval_count": 18,
"eval_duration": 717086791,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,74 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
}
],
"stream": false,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather in a given city",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city to get the weather for"
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "0fe94e7d-f25b-4843-ba0a-e402e0764830",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "I can\u2019t help with that. If you're looking for current weather information, I recommend checking a weather website or app, such as AccuWeather or Weather.com. Is there anything else I can help you with?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920402,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 45,
"prompt_tokens": 27,
"total_tokens": 72,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,55 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Test trace 0"
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-272",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "I'm happy to help you with a test. Since we are in the middle of a text-based conversation, I'll do my best to simulate a simple test tracing process.\n\n**Trace Test Results**\n\nTo perform this test, please follow these steps:\n\n1. Type \"test\" on command mode.\n2. Press Enter.\n\nNow, let's start tracing...\n\nTest Tracing Results:\nTest Case: General Functions\nTest Case Result: PASS\n\nSystem Response:\n\n```\n# System Boot Time: 2023-10-13T14:30:00\n# CPU Temperature: 35\u00b0C\n# Disk Space Available: 80%\n```\n\nNext Steps?\n\nType 'done' to exit the test, or 'run' for more tests.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758978134,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 152,
"prompt_tokens": 29,
"total_tokens": 181,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,44 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
"stop": "1963",
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-183",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Michael Jordan was born in the year of "
}
],
"created": 1758978053,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 11,
"prompt_tokens": 48,
"total_tokens": 59,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,112 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"stream": true,
"tool_choice": "auto",
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-634",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_wubm4yax",
"function": {
"arguments": "{\"location\":\"San Francisco, CA\"}",
"name": "get_weather"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758975115,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-634",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 1758975115,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,47 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
"stop": [
"blathering",
"1963"
],
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-381",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Michael Jordan was born in the year of "
}
],
"created": 1758978056,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 11,
"prompt_tokens": 48,
"total_tokens": 59,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,55 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Test trace 1"
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-122",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "It appears you're trying to initiate a conversation or test the functionality of this AI system. I'm happy to chat with you!\n\nWould you like to:\nA) Ask me a question on a specific topic\nB) Engage in a conversational dialogue on a topic of your choice\nC) Play a text-based game\nD) Test my language understanding capabilities\n\nPlease respond with the letter of your preferred activity.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758978142,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 85,
"prompt_tokens": 29,
"total_tokens": 114,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -13,22 +13,23 @@
"__data__": { "__data__": {
"models": [ "models": [
{ {
"model": "llama3.2-vision:11b", "model": "llama3.2:3b",
"name": "llama3.2-vision:11b", "name": "llama3.2:3b",
"digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e", "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-09-03T11:51:35.966409-07:00", "expires_at": "2025-09-27T11:54:56.718552-07:00",
"size": 12401209008, "size": 3367856128,
"size_vram": 12401209008, "size_vram": 3367856128,
"details": { "details": {
"parent_model": "", "parent_model": "",
"format": "gguf", "format": "gguf",
"family": "mllama", "family": "llama",
"families": [ "families": [
"mllama" "llama"
], ],
"parameter_size": "10.7B", "parameter_size": "3.2B",
"quantization_level": "Q4_K_M" "quantization_level": "Q4_K_M"
} },
"context_length": 4096
} }
] ]
} }

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,43 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "1bbb8db5-63e5-40cd-8ffe-59e0e88bf8f0",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": "4. At the beginning of the year, a woman has $5,000"
}
],
"created": 1758920353,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 16,
"prompt_tokens": 25,
"total_tokens": 41,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Which planet has rings around it with a name starting with letter S?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "26632ea9-3481-419d-bc0d-83c177257bc4",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "There are two planets in our solar system with ring systems that have names starting with the letter S:\n\n1. **Saturn** - Its ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice and rock particles that range in size from tiny dust grains to massive boulders.\n2. **Saturn's moon** - The ring system of **Saturn's moon, Rhea**, is sometimes referred to as a \"ring system\" even though it's much smaller and less prominent than Saturn's. However, it's worth noting that Rhea's ring system is not as well-known as Saturn's.\n\nIf you're looking for a planet with a ring system that starts with the letter S and is not a moon, then the answer is Saturn!",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920397,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 164,
"prompt_tokens": 24,
"total_tokens": 188,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,185 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"greet_everyone\",\n \"description\": \"\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"url\"],\n \"properties\": {\n \"url\": {\n \"type\": \"string\",\n \"description\": \"\"\n }\n }\n }\n },\n {\n \"name\": \"get_boiling_point\",\n \"description\": \"\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n \",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"liquid_name\"],\n \"properties\": {\n \"liquid_name\": {\n \"type\": \"string\",\n \"description\": \"\"\n },\n \"celsius\": {\n \"type\": \"boolean\",\n \"description\": \"\",\n \"default\": \"True\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.034121Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[g",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.07569Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "reet",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.116927Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_every",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.159755Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "one",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.201675Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "(url",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.243056Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "=\"",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.284651Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "world",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.326276Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\")]",
"thinking": null,
"context": null
}
},
{
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-09-27T18:05:56.367959Z",
"done": true,
"done_reason": "stop",
"total_duration": 5381441291,
"load_duration": 4112439791,
"prompt_eval_count": 459,
"prompt_eval_duration": 932587833,
"eval_count": 9,
"eval_duration": 334328250,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,706 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": "Hello!",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " It",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": "'s",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " nice",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " to",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " meet",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": ".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " Is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " there",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " something",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " I",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " can",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " help",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " with",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": ",",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " or",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " would",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " like",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " to",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " chat",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": "?",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 26,
"prompt_tokens": 14,
"total_tokens": 40,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,996 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
"max_tokens": 50,
"stream": true,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " a"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " __________________"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "_____"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "##"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Step"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " "
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "1"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ":"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Identify"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " flower"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " mentioned"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " in"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " sentence"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "The"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " sentence"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " mentions"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " \""
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "vio"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\"\n\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "##"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Step"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " "
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "2"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ":"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Determine"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " flower"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " v"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "io"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " are"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "V"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "io"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " are"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " a"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": ""
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 50,
"prompt_tokens": 25,
"total_tokens": 75,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Which planet do humans live on?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "0fd60cd7-dc72-45b7-808c-4da91de80093",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Humans live on a planet called Earth.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920388,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 9,
"prompt_tokens": 17,
"total_tokens": 26,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,527 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-1-dev-fp8",
"created": 1729532889,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": false,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
"created": 1743381121,
"object": "model",
"owned_by": "tvergho-87e44d",
"kind": "HF_PEFT_ADDON",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-kontext-max",
"created": 1750714611,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-kontext-pro",
"created": 1750488264,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
"created": 1748467427,
"object": "model",
"owned_by": "sentientfoundation-serverless",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3",
"created": 1735576668,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
"created": 1739563474,
"object": "model",
"owned_by": "sentientfoundation",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/gpt-oss-120b",
"created": 1754345600,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
"created": 1753211090,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
"created": 1753916446,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
"created": 1753124424,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
"created": 1753455434,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-embedding-8b",
"created": 1755707090,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 40960
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3-0324",
"created": 1742827220,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3p1-terminus",
"created": 1758586241,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/kimi-k2-instruct",
"created": 1752259096,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/gpt-oss-20b",
"created": 1754345466,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"created": 1743878495,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": true,
"context_length": 1048576
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
"created": 1754063588,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"created": 1733442103,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
"created": 1743392739,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false,
"context_length": 128000
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b",
"created": 1745885249,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/glm-4p5-air",
"created": 1754089426,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1",
"created": 1737397673,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"created": 1721692808,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1-basic",
"created": 1742306746,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3p1",
"created": 1755758988,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-1-schnell-fp8",
"created": 1729535376,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": false,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/glm-4p5",
"created": 1753809636,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/kimi-k2-instruct-0905",
"created": 1757018994,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
"created": 1721428386,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama4-scout-instruct-basic",
"created": 1743878279,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": true,
"context_length": 1048576
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b",
"created": 1745878133,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
"created": 1721287357,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1-0528",
"created": 1748456377,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/mixtral-8x22b-instruct",
"created": 1713375508,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 65536
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
"created": 1753808388,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
}
],
"is_streaming": false
}
}

View file

@ -0,0 +1,834 @@
{
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-0613",
"created": 1686588896,
"object": "model",
"owned_by": "openai"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4",
"created": 1687882411,
"object": "model",
"owned_by": "openai"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo",
"created": 1677610602,
"object": "model",
"owned_by": "openai"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-codex",
"created": 1757527818,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-audio-2025-08-28",
"created": 1756256146,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-realtime",
"created": 1756271701,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-realtime-2025-08-28",
"created": 1756271773,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-audio",
"created": 1756339249,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "davinci-002",
"created": 1692634301,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "babbage-002",
"created": 1692634615,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-instruct",
"created": 1692901427,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-instruct-0914",
"created": 1694122472,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "dall-e-3",
"created": 1698785189,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "dall-e-2",
"created": 1698798177,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-1106-preview",
"created": 1698957206,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-1106",
"created": 1698959748,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1-hd",
"created": 1699046015,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1-1106",
"created": 1699053241,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1-hd-1106",
"created": 1699053533,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "text-embedding-3-small",
"created": 1705948997,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "text-embedding-3-large",
"created": 1705953180,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-0125-preview",
"created": 1706037612,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-turbo-preview",
"created": 1706037777,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-0125",
"created": 1706048358,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-turbo",
"created": 1712361441,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-turbo-2024-04-09",
"created": 1712601677,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o",
"created": 1715367049,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-2024-05-13",
"created": 1715368132,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-2024-07-18",
"created": 1721172717,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini",
"created": 1721172741,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-2024-08-06",
"created": 1722814719,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "chatgpt-4o-latest",
"created": 1723515131,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-mini-2024-09-12",
"created": 1725648979,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-mini",
"created": 1725649008,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview-2024-10-01",
"created": 1727131766,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview-2024-10-01",
"created": 1727389042,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview",
"created": 1727460443,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview",
"created": 1727659998,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "omni-moderation-latest",
"created": 1731689265,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "omni-moderation-2024-09-26",
"created": 1732734466,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview-2024-12-17",
"created": 1733945430,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview-2024-12-17",
"created": 1734034239,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-realtime-preview-2024-12-17",
"created": 1734112601,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-audio-preview-2024-12-17",
"created": 1734115920,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-2024-12-17",
"created": 1734326976,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1",
"created": 1734375816,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-realtime-preview",
"created": 1734387380,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-audio-preview",
"created": 1734387424,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-mini",
"created": 1737146383,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-mini-2025-01-31",
"created": 1738010200,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-2024-11-20",
"created": 1739331543,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-search-preview-2025-03-11",
"created": 1741388170,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-search-preview",
"created": 1741388720,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-search-preview-2025-03-11",
"created": 1741390858,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-search-preview",
"created": 1741391161,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-transcribe",
"created": 1742068463,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-transcribe",
"created": 1742068596,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-pro-2025-03-19",
"created": 1742251504,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-pro",
"created": 1742251791,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-tts",
"created": 1742403959,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-2025-04-16",
"created": 1744133301,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini-2025-04-16",
"created": 1744133506,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3",
"created": 1744225308,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini",
"created": 1744225351,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-2025-04-14",
"created": 1744315746,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1",
"created": 1744316542,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-mini-2025-04-14",
"created": 1744317547,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-mini",
"created": 1744318173,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-nano-2025-04-14",
"created": 1744321025,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-nano",
"created": 1744321707,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-image-1",
"created": 1745517030,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "codex-mini-latest",
"created": 1746673257,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-pro",
"created": 1748475349,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview-2025-06-03",
"created": 1748907838,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview-2025-06-03",
"created": 1748908498,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-pro-2025-06-10",
"created": 1749166761,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini-deep-research",
"created": 1749685485,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-deep-research",
"created": 1749840121,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-deep-research-2025-06-26",
"created": 1750865219,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini-deep-research-2025-06-26",
"created": 1750866121,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-chat-latest",
"created": 1754073306,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-2025-08-07",
"created": 1754075360,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5",
"created": 1754425777,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-mini-2025-08-07",
"created": 1754425867,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-mini",
"created": 1754425928,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-nano-2025-08-07",
"created": 1754426303,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-nano",
"created": 1754426384,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-16k",
"created": 1683758102,
"object": "model",
"owned_by": "openai-internal"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1",
"created": 1681940951,
"object": "model",
"owned_by": "openai-internal"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "whisper-1",
"created": 1677532384,
"object": "model",
"owned_by": "openai-internal"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "text-embedding-ada-002",
"created": 1671217299,
"object": "model",
"owned_by": "openai-internal"
}
}
],
"is_streaming": false
}
}

View file

@ -0,0 +1,96 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "nomic-embed-text:latest",
"created": 1756922046,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "all-minilm:l6-v2",
"created": 1756919946,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2-vision:11b",
"created": 1753926302,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2-vision:latest",
"created": 1753845527,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama-guard3:1b",
"created": 1753479584,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:1b",
"created": 1752814944,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "all-minilm:latest",
"created": 1748994610,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:3b",
"created": 1746123323,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:3b-instruct-fp16",
"created": 1746052428,
"object": "model",
"owned_by": "library"
}
}
],
"is_streaming": false
}
}

View file

@ -127,9 +127,8 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
name="fireworks", name="fireworks",
description="Fireworks provider with a text model", description="Fireworks provider with a text model",
defaults={ defaults={
"text_model": "accounts/fireworks/models/llama-v3p1-8b-instruct", "text_model": "fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct",
"vision_model": "accounts/fireworks/models/llama-v3p2-90b-vision-instruct", "embedding_model": "fireworks/accounts/fireworks/models/qwen3-embedding-8b",
"embedding_model": "nomic-ai/nomic-embed-text-v1.5",
}, },
), ),
} }

View file

@ -32,8 +32,8 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
) )
for i in range(2): for i in range(2):
llama_stack_client.inference.chat_completion( llama_stack_client.chat.completions.create(
model_id=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}] model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
) )
start_time = time.time() start_time = time.time()

View file

@ -83,13 +83,20 @@
], ],
"tools": [ "tools": [
{ {
"tool_name": "get_weather", "type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather", "description": "Get the current weather",
"parameters": { "parameters": {
"type": "object",
"properties": {
"location": { "location": {
"param_type": "string", "type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA." "description": "The city and state (both required), e.g. San Francisco, CA."
} }
},
"required": ["location"]
}
} }
} }
], ],
@ -116,13 +123,20 @@
], ],
"tools": [ "tools": [
{ {
"tool_name": "get_weather", "type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather", "description": "Get the current weather",
"parameters": { "parameters": {
"type": "object",
"properties": {
"location": { "location": {
"param_type": "string", "type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA." "description": "The city and state (both required), e.g. San Francisco, CA."
} }
},
"required": ["location"]
}
} }
} }
], ],
@ -162,13 +176,20 @@
], ],
"tools": [ "tools": [
{ {
"tool_name": "get_weather", "type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather", "description": "Get the current weather",
"parameters": { "parameters": {
"type": "object",
"properties": {
"location": { "location": {
"param_type": "string", "type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA." "description": "The city and state (both required), e.g. San Francisco, CA."
} }
},
"required": ["location"]
}
} }
} }
], ],
@ -192,66 +213,6 @@
] ]
} }
}, },
"array_parameter": {
"data": {
"messages": [
[
{
"role": "user",
"content": "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
}
]
],
"tools": [
{
"tool_name": "addProduct",
"description": "Get the current weather",
"parameters": {
"name": {
"param_type": "string",
"description": "Name of the product"
},
"price": {
"param_type": "number",
"description": "Price of the product"
},
"inStock": {
"param_type": "boolean",
"description": "Availability status of the product."
},
"tags": {
"param_type": "list[str]",
"description": "List of product tags"
}
}
}
],
"tool_responses": [
{
"response": "{'response': 'Successfully added product with id: 123'}"
}
],
"expected": [
{
"num_tool_calls": 1,
"tool_name": "addProduct",
"tool_arguments": {
"name": "Widget",
"price": 19.99,
"inStock": true,
"tags": [
"new",
"sale"
]
}
},
{
"num_tool_calls": 0,
"answer": "123"
}
]
}
},
"sample_messages_tool_calling": { "sample_messages_tool_calling": {
"data": { "data": {
"messages": [ "messages": [
@ -270,13 +231,19 @@
], ],
"tools": [ "tools": [
{ {
"tool_name": "get_weather", "type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather", "description": "Get the current weather",
"parameters": { "parameters": {
"type": "object",
"properties": {
"location": { "location": {
"param_type": "string", "type": "string",
"description": "The city and state, e.g. San Francisco, CA", "description": "The city and state (both required), e.g. San Francisco, CA."
"required": true }
},
"required": ["location"]
} }
} }
} }
@ -343,18 +310,23 @@
], ],
"tools": [ "tools": [
{ {
"tool_name": "get_object_namespace_list", "type": "function",
"function": {
"name": "get_object_namespace_list",
"description": "Get the list of objects in a namespace", "description": "Get the list of objects in a namespace",
"parameters": { "parameters": {
"type": "object",
"properties": {
"kind": { "kind": {
"param_type": "string", "type": "string",
"description": "the type of object", "description": "the type of object"
"required": true
}, },
"namespace": { "namespace": {
"param_type": "string", "type": "string",
"description": "the name of the namespace", "description": "the name of the namespace"
"required": true }
},
"required": ["kind", "namespace"]
} }
} }
} }

View file

@ -31,6 +31,11 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
uri = mcp_server["server_url"] uri = mcp_server["server_url"]
# registering should not raise an error anymore even if you don't specify the auth token # registering should not raise an error anymore even if you don't specify the auth token
try:
llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
except Exception:
pass
llama_stack_client.toolgroups.register( llama_stack_client.toolgroups.register(
toolgroup_id=test_toolgroup_id, toolgroup_id=test_toolgroup_id,
provider_id="model-context-protocol", provider_id="model-context-protocol",

View file

@ -107,14 +107,34 @@ async def test_get_raw_document_text_deprecated_text_yaml_with_text_content_item
assert "text/yaml" in str(w[0].message) assert "text/yaml" in str(w[0].message)
async def test_get_raw_document_text_supports_json_mime_type():
"""Test that the function accepts application/json mime type."""
json_content = '{"name": "test", "version": "1.0", "items": ["item1", "item2"]}'
document = Document(content=json_content, mime_type="application/json")
result = await get_raw_document_text(document)
assert result == json_content
async def test_get_raw_document_text_with_json_text_content_item():
"""Test that the function handles JSON TextContentItem correctly."""
json_content = '{"key": "value", "nested": {"array": [1, 2, 3]}}'
document = Document(content=TextContentItem(text=json_content), mime_type="application/json")
result = await get_raw_document_text(document)
assert result == json_content
async def test_get_raw_document_text_rejects_unsupported_mime_types(): async def test_get_raw_document_text_rejects_unsupported_mime_types():
"""Test that the function rejects unsupported mime types.""" """Test that the function rejects unsupported mime types."""
document = Document( document = Document(
content="Some content", content="Some content",
mime_type="application/json", # Not supported mime_type="application/pdf", # Not supported
) )
with pytest.raises(ValueError, match="Unexpected document mime type: application/json"): with pytest.raises(ValueError, match="Unexpected document mime type: application/pdf"):
await get_raw_document_text(document) await get_raw_document_text(document)

View file

@ -16,9 +16,11 @@ from llama_stack.apis.agents import (
) )
from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.inference import Inference from llama_stack.apis.inference import Inference
from llama_stack.apis.resource import ResourceType
from llama_stack.apis.safety import Safety from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.tools import ListToolsResponse, Tool, ToolGroups, ToolParameter, ToolRuntime
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.providers.inline.agents.meta_reference.agent_instance import ChatAgent
from llama_stack.providers.inline.agents.meta_reference.agents import MetaReferenceAgentsImpl from llama_stack.providers.inline.agents.meta_reference.agents import MetaReferenceAgentsImpl
from llama_stack.providers.inline.agents.meta_reference.config import MetaReferenceAgentsImplConfig from llama_stack.providers.inline.agents.meta_reference.config import MetaReferenceAgentsImplConfig
from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo
@ -75,11 +77,11 @@ def sample_agent_config():
}, },
input_shields=["string"], input_shields=["string"],
output_shields=["string"], output_shields=["string"],
toolgroups=["string"], toolgroups=["mcp::my_mcp_server"],
client_tools=[ client_tools=[
{ {
"name": "string", "name": "client_tool",
"description": "string", "description": "Client Tool",
"parameters": [ "parameters": [
{ {
"name": "string", "name": "string",
@ -226,3 +228,83 @@ async def test_delete_agent(agents_impl, sample_agent_config):
# Verify the agent was deleted # Verify the agent was deleted
with pytest.raises(ValueError): with pytest.raises(ValueError):
await agents_impl.get_agent(agent_id) await agents_impl.get_agent(agent_id)
async def test__initialize_tools(agents_impl, sample_agent_config):
# Mock tool_groups_api.list_tools()
agents_impl.tool_groups_api.list_tools.return_value = ListToolsResponse(
data=[
Tool(
identifier="story_maker",
provider_id="model-context-protocol",
type=ResourceType.tool,
toolgroup_id="mcp::my_mcp_server",
description="Make a story",
parameters=[
ToolParameter(
name="story_title",
parameter_type="string",
description="Title of the story",
required=True,
title="Story Title",
),
ToolParameter(
name="input_words",
parameter_type="array",
description="Input words",
required=False,
items={"type": "string"},
title="Input Words",
default=[],
),
],
)
]
)
create_response = await agents_impl.create_agent(sample_agent_config)
agent_id = create_response.agent_id
# Get an instance of ChatAgent
chat_agent = await agents_impl._get_agent_impl(agent_id)
assert chat_agent is not None
assert isinstance(chat_agent, ChatAgent)
# Initialize tool definitions
await chat_agent._initialize_tools()
assert len(chat_agent.tool_defs) == 2
# Verify the first tool, which is a client tool
first_tool = chat_agent.tool_defs[0]
assert first_tool.tool_name == "client_tool"
assert first_tool.description == "Client Tool"
# Verify the second tool, which is an MCP tool that has an array-type property
second_tool = chat_agent.tool_defs[1]
assert second_tool.tool_name == "story_maker"
assert second_tool.description == "Make a story"
parameters = second_tool.parameters
assert len(parameters) == 2
# Verify a string property
story_title = parameters.get("story_title")
assert story_title is not None
assert story_title.param_type == "string"
assert story_title.description == "Title of the story"
assert story_title.required
assert story_title.items is None
assert story_title.title == "Story Title"
assert story_title.default is None
# Verify an array property
input_words = parameters.get("input_words")
assert input_words is not None
assert input_words.param_type == "array"
assert input_words.description == "Input words"
assert not input_words.required
assert input_words.items is not None
assert len(input_words.items) == 1
assert input_words.items.get("type") == "string"
assert input_words.title == "Input Words"
assert input_words.default == []

View file

@ -3,5 +3,3 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .batch_inference import *

View file

@ -0,0 +1,147 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""
Unit tests for MCP tool parameter conversion in streaming responses.
This tests the fix for handling array-type parameters with 'items' field
when converting MCP tool definitions to OpenAI format.
"""
from llama_stack.apis.tools import ToolDef, ToolParameter
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
def test_mcp_tool_conversion_with_array_items():
"""
Test that MCP tool parameters with array type and items field are properly converted.
This is a regression test for the bug where array parameters without 'items'
caused OpenAI API validation errors like:
"Invalid schema for function 'pods_exec': In context=('properties', 'command'),
array schema missing items."
"""
# Create a tool parameter with array type and items specification
# This mimics what kubernetes-mcp-server's pods_exec tool has
tool_param = ToolParameter(
name="command",
parameter_type="array",
description="Command to execute in the pod",
required=True,
items={"type": "string"}, # This is the crucial field
)
# Convert to ToolDefinition format (as done in streaming.py)
tool_def = ToolDefinition(
tool_name="test_tool",
description="Test tool with array parameter",
parameters={
"command": ToolParamDefinition(
param_type=tool_param.parameter_type,
description=tool_param.description,
required=tool_param.required,
default=tool_param.default,
items=tool_param.items, # The fix: ensure items is passed through
)
},
)
# Convert to OpenAI format
openai_tool = convert_tooldef_to_openai_tool(tool_def)
# Verify the conversion includes the items field
assert openai_tool["type"] == "function"
assert openai_tool["function"]["name"] == "test_tool"
assert "parameters" in openai_tool["function"]
parameters = openai_tool["function"]["parameters"]
assert "properties" in parameters
assert "command" in parameters["properties"]
command_param = parameters["properties"]["command"]
assert command_param["type"] == "array"
assert "items" in command_param, "Array parameter must have 'items' field for OpenAI API"
assert command_param["items"] == {"type": "string"}
def test_mcp_tool_conversion_without_array():
"""Test that non-array parameters work correctly without items field."""
tool_param = ToolParameter(
name="name",
parameter_type="string",
description="Name parameter",
required=True,
)
tool_def = ToolDefinition(
tool_name="test_tool",
description="Test tool with string parameter",
parameters={
"name": ToolParamDefinition(
param_type=tool_param.parameter_type,
description=tool_param.description,
required=tool_param.required,
items=tool_param.items, # Will be None for non-array types
)
},
)
openai_tool = convert_tooldef_to_openai_tool(tool_def)
# Verify basic structure
assert openai_tool["type"] == "function"
parameters = openai_tool["function"]["parameters"]
assert "name" in parameters["properties"]
name_param = parameters["properties"]["name"]
assert name_param["type"] == "string"
# items should not be present for non-array types
assert "items" not in name_param or name_param.get("items") is None
def test_mcp_tool_conversion_complex_array_items():
"""Test array parameter with complex items schema (object type)."""
tool_param = ToolParameter(
name="configs",
parameter_type="array",
description="Array of configuration objects",
required=False,
items={
"type": "object",
"properties": {
"key": {"type": "string"},
"value": {"type": "string"},
},
"required": ["key"],
},
)
tool_def = ToolDefinition(
tool_name="test_tool",
description="Test tool with complex array parameter",
parameters={
"configs": ToolParamDefinition(
param_type=tool_param.parameter_type,
description=tool_param.description,
required=tool_param.required,
items=tool_param.items,
)
},
)
openai_tool = convert_tooldef_to_openai_tool(tool_def)
# Verify complex items schema is preserved
parameters = openai_tool["function"]["parameters"]
configs_param = parameters["properties"]["configs"]
assert configs_param["type"] == "array"
assert "items" in configs_param
assert configs_param["items"]["type"] == "object"
assert "properties" in configs_param["items"]
assert "key" in configs_param["items"]["properties"]
assert "value" in configs_param["items"]["properties"]

View file

@ -4,11 +4,11 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from unittest.mock import MagicMock, PropertyMock, patch from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
import pytest import pytest
from llama_stack.apis.inference import Model from llama_stack.apis.inference import Model, OpenAIUserMessageParam
from llama_stack.apis.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@ -43,8 +43,17 @@ class OpenAIMixinWithEmbeddingsImpl(OpenAIMixin):
@pytest.fixture @pytest.fixture
def mixin(): def mixin():
"""Create a test instance of OpenAIMixin""" """Create a test instance of OpenAIMixin with mocked model_store"""
return OpenAIMixinImpl() mixin_instance = OpenAIMixinImpl()
# just enough to satisfy _get_provider_model_id calls
mock_model_store = MagicMock()
mock_model = MagicMock()
mock_model.provider_resource_id = "test-provider-resource-id"
mock_model_store.get_model = AsyncMock(return_value=mock_model)
mixin_instance.model_store = mock_model_store
return mixin_instance
@pytest.fixture @pytest.fixture
@ -205,6 +214,74 @@ class TestOpenAIMixinCacheBehavior:
assert "final-mock-model-id" in mixin._model_cache assert "final-mock-model-id" in mixin._model_cache
class TestOpenAIMixinImagePreprocessing:
"""Test cases for image preprocessing functionality"""
async def test_openai_chat_completion_with_image_preprocessing_enabled(self, mixin):
"""Test that image URLs are converted to base64 when download_images is True"""
mixin.download_images = True
message = OpenAIUserMessageParam(
role="user",
content=[
{"type": "text", "text": "What's in this image?"},
{"type": "image_url", "image_url": {"url": "http://example.com/image.jpg"}},
],
)
mock_client = MagicMock()
mock_response = MagicMock()
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
with patch.object(type(mixin), "client", new_callable=PropertyMock, return_value=mock_client):
with patch("llama_stack.providers.utils.inference.openai_mixin.localize_image_content") as mock_localize:
mock_localize.return_value = (b"fake_image_data", "jpeg")
await mixin.openai_chat_completion(model="test-model", messages=[message])
mock_localize.assert_called_once_with("http://example.com/image.jpg")
mock_client.chat.completions.create.assert_called_once()
call_args = mock_client.chat.completions.create.call_args
processed_messages = call_args[1]["messages"]
assert len(processed_messages) == 1
content = processed_messages[0]["content"]
assert len(content) == 2
assert content[0]["type"] == "text"
assert content[1]["type"] == "image_url"
assert content[1]["image_url"]["url"] == "data:image/jpeg;base64,ZmFrZV9pbWFnZV9kYXRh"
async def test_openai_chat_completion_with_image_preprocessing_disabled(self, mixin):
"""Test that image URLs are not modified when download_images is False"""
mixin.download_images = False # explicitly set to False
message = OpenAIUserMessageParam(
role="user",
content=[
{"type": "text", "text": "What's in this image?"},
{"type": "image_url", "image_url": {"url": "http://example.com/image.jpg"}},
],
)
mock_client = MagicMock()
mock_response = MagicMock()
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
with patch.object(type(mixin), "client", new_callable=PropertyMock, return_value=mock_client):
with patch("llama_stack.providers.utils.inference.openai_mixin.localize_image_content") as mock_localize:
await mixin.openai_chat_completion(model="test-model", messages=[message])
mock_localize.assert_not_called()
mock_client.chat.completions.create.assert_called_once()
call_args = mock_client.chat.completions.create.call_args
processed_messages = call_args[1]["messages"]
assert len(processed_messages) == 1
content = processed_messages[0]["content"]
assert len(content) == 2
assert content[1]["image_url"]["url"] == "http://example.com/image.jpg"
class TestOpenAIMixinEmbeddingModelMetadata: class TestOpenAIMixinEmbeddingModelMetadata:
"""Test cases for embedding_model_metadata attribute functionality""" """Test cases for embedding_model_metadata attribute functionality"""