mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
Merge b1cbfe99f9
into sapling-pr-archive-ehhuang
This commit is contained in:
commit
91898e6598
81 changed files with 51742 additions and 2402 deletions
2
.github/workflows/conformance.yml
vendored
2
.github/workflows/conformance.yml
vendored
|
@ -43,7 +43,7 @@ jobs:
|
|||
# Cache oasdiff to avoid checksum failures and speed up builds
|
||||
- name: Cache oasdiff
|
||||
id: cache-oasdiff
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830
|
||||
with:
|
||||
path: ~/oasdiff
|
||||
key: oasdiff-${{ runner.os }}
|
||||
|
|
|
@ -4,6 +4,8 @@ include llama_stack/models/llama/llama4/tokenizer.model
|
|||
include llama_stack/core/*.sh
|
||||
include llama_stack/cli/scripts/*.sh
|
||||
include llama_stack/distributions/*/*.yaml
|
||||
include llama_stack/providers/tests/test_cases/inference/*.json
|
||||
exclude llama_stack/distributions/ci-tests
|
||||
include tests/integration/test_cases/inference/*.json
|
||||
include llama_stack/models/llama/*/*.md
|
||||
include llama_stack/tests/integration/*.jpg
|
||||
prune llama_stack/distributions/ci-tests
|
||||
|
|
|
@ -139,18 +139,7 @@ Methods:
|
|||
- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn">client.agents.turn.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn_create_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn_create_response.py">TurnCreateResponse</a></code>
|
||||
- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.agents.turn.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/agents/turn.py">Turn</a></code>
|
||||
|
||||
## BatchInference
|
||||
|
||||
Types:
|
||||
|
||||
```python
|
||||
from llama_stack_client.types import BatchInferenceChatCompletionResponse
|
||||
```
|
||||
|
||||
Methods:
|
||||
|
||||
- <code title="post /v1/batch-inference/chat-completion">client.batch_inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/batch_inference.py">chat_completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/batch_inference_chat_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/batch_inference_chat_completion_response.py">BatchInferenceChatCompletionResponse</a></code>
|
||||
- <code title="post /v1/batch-inference/completion">client.batch_inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/batch_inference.py">completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/batch_inference_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/shared/batch_completion.py">BatchCompletion</a></code>
|
||||
|
||||
## Datasets
|
||||
|
||||
|
|
|
@ -548,7 +548,6 @@ class Generator:
|
|||
if op.defining_class.__name__ in [
|
||||
"SyntheticDataGeneration",
|
||||
"PostTraining",
|
||||
"BatchInference",
|
||||
]:
|
||||
op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
|
||||
print(op.defining_class.__name__)
|
||||
|
|
526
docs/static/llama-stack-spec.html
vendored
526
docs/static/llama-stack-spec.html
vendored
|
@ -87,94 +87,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/inference/batch-chat-completion": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A BatchChatCompletionResponse with the full completions.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/BatchChatCompletionResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate chat completions for a batch of messages using the specified model.",
|
||||
"description": "Generate chat completions for a batch of messages using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/BatchChatCompletionRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/inference/batch-completion": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A BatchCompletionResponse with the full completions.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/BatchCompletionResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate completions for a batch of content using the specified model.",
|
||||
"description": "Generate completions for a batch of content using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/BatchCompletionRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1alpha/post-training/job/cancel": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -281,7 +193,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"BatchInference (Coming Soon)"
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate a chat completion for the given messages using the specified model.",
|
||||
"description": "Generate a chat completion for the given messages using the specified model.",
|
||||
|
@ -298,55 +210,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/inference/completion": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CompletionResponse"
|
||||
}
|
||||
},
|
||||
"text/event-stream": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CompletionResponseStreamChunk"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"BatchInference (Coming Soon)"
|
||||
],
|
||||
"summary": "Generate a completion for the given content using the specified model.",
|
||||
"description": "Generate a completion for the given content using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CompletionRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/agents": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -6346,6 +6209,20 @@
|
|||
],
|
||||
"title": "AppendRowsRequest"
|
||||
},
|
||||
"CancelTrainingJobRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"job_uuid": {
|
||||
"type": "string",
|
||||
"description": "The UUID of the job to cancel."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"job_uuid"
|
||||
],
|
||||
"title": "CancelTrainingJobRequest"
|
||||
},
|
||||
"CompletionMessage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -6906,6 +6783,31 @@
|
|||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"items": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"title": {
|
||||
"type": "string"
|
||||
},
|
||||
"default": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
@ -7051,26 +6953,23 @@
|
|||
"title": "UserMessage",
|
||||
"description": "A message from the user in a chat conversation."
|
||||
},
|
||||
"BatchChatCompletionRequest": {
|
||||
"ChatCompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model_id": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
|
||||
},
|
||||
"messages_batch": {
|
||||
"messages": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Message"
|
||||
}
|
||||
"$ref": "#/components/schemas/Message"
|
||||
},
|
||||
"description": "The messages to generate completions for."
|
||||
"description": "List of messages in the conversation."
|
||||
},
|
||||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams",
|
||||
"description": "(Optional) Parameters to control the sampling strategy."
|
||||
"description": "Parameters to control the sampling strategy."
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
|
@ -7079,13 +6978,31 @@
|
|||
},
|
||||
"description": "(Optional) List of tool definitions available to the model."
|
||||
},
|
||||
"tool_config": {
|
||||
"$ref": "#/components/schemas/ToolConfig",
|
||||
"description": "(Optional) Configuration for tool use."
|
||||
"tool_choice": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"auto",
|
||||
"required",
|
||||
"none"
|
||||
],
|
||||
"description": "(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead."
|
||||
},
|
||||
"tool_prompt_format": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"json",
|
||||
"function_tag",
|
||||
"python_list"
|
||||
],
|
||||
"description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead."
|
||||
},
|
||||
"response_format": {
|
||||
"$ref": "#/components/schemas/ResponseFormat",
|
||||
"description": "(Optional) Grammar specification for guided (structured) decoding."
|
||||
"description": "(Optional) Grammar specification for guided (structured) decoding. There are two options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it."
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean",
|
||||
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "object",
|
||||
|
@ -7098,32 +7015,18 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"description": "(Optional) If specified, log probabilities for each token position will be returned."
|
||||
},
|
||||
"tool_config": {
|
||||
"$ref": "#/components/schemas/ToolConfig",
|
||||
"description": "(Optional) Configuration for tool use."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model_id",
|
||||
"messages_batch"
|
||||
"messages"
|
||||
],
|
||||
"title": "BatchChatCompletionRequest"
|
||||
},
|
||||
"BatchChatCompletionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"batch": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ChatCompletionResponse"
|
||||
},
|
||||
"description": "List of chat completion responses, one for each conversation in the batch"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"batch"
|
||||
],
|
||||
"title": "BatchChatCompletionResponse",
|
||||
"description": "Response from a batch chat completion request."
|
||||
"title": "ChatCompletionRequest"
|
||||
},
|
||||
"ChatCompletionResponse": {
|
||||
"type": "object",
|
||||
|
@ -7203,194 +7106,6 @@
|
|||
"title": "TokenLogProbs",
|
||||
"description": "Log probabilities for generated tokens."
|
||||
},
|
||||
"BatchCompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model_id": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
|
||||
},
|
||||
"content_batch": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/InterleavedContent"
|
||||
},
|
||||
"description": "The content to generate completions for."
|
||||
},
|
||||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams",
|
||||
"description": "(Optional) Parameters to control the sampling strategy."
|
||||
},
|
||||
"response_format": {
|
||||
"$ref": "#/components/schemas/ResponseFormat",
|
||||
"description": "(Optional) Grammar specification for guided (structured) decoding."
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"top_k": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "How many tokens (for each position) to return log probabilities for."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "(Optional) If specified, log probabilities for each token position will be returned."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model_id",
|
||||
"content_batch"
|
||||
],
|
||||
"title": "BatchCompletionRequest"
|
||||
},
|
||||
"BatchCompletionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"batch": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/CompletionResponse"
|
||||
},
|
||||
"description": "List of completion responses, one for each input in the batch"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"batch"
|
||||
],
|
||||
"title": "BatchCompletionResponse",
|
||||
"description": "Response from a batch completion request."
|
||||
},
|
||||
"CompletionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"metrics": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricInResponse"
|
||||
},
|
||||
"description": "(Optional) List of metrics associated with the API response"
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The generated completion text"
|
||||
},
|
||||
"stop_reason": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"end_of_turn",
|
||||
"end_of_message",
|
||||
"out_of_tokens"
|
||||
],
|
||||
"description": "Reason why generation stopped"
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/TokenLogProbs"
|
||||
},
|
||||
"description": "Optional log probabilities for generated tokens"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"stop_reason"
|
||||
],
|
||||
"title": "CompletionResponse",
|
||||
"description": "Response from a completion request."
|
||||
},
|
||||
"CancelTrainingJobRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"job_uuid": {
|
||||
"type": "string",
|
||||
"description": "The UUID of the job to cancel."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"job_uuid"
|
||||
],
|
||||
"title": "CancelTrainingJobRequest"
|
||||
},
|
||||
"ChatCompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model_id": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
|
||||
},
|
||||
"messages": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Message"
|
||||
},
|
||||
"description": "List of messages in the conversation."
|
||||
},
|
||||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams",
|
||||
"description": "Parameters to control the sampling strategy."
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolDefinition"
|
||||
},
|
||||
"description": "(Optional) List of tool definitions available to the model."
|
||||
},
|
||||
"tool_choice": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"auto",
|
||||
"required",
|
||||
"none"
|
||||
],
|
||||
"description": "(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead."
|
||||
},
|
||||
"tool_prompt_format": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"json",
|
||||
"function_tag",
|
||||
"python_list"
|
||||
],
|
||||
"description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead."
|
||||
},
|
||||
"response_format": {
|
||||
"$ref": "#/components/schemas/ResponseFormat",
|
||||
"description": "(Optional) Grammar specification for guided (structured) decoding. There are two options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it."
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean",
|
||||
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"top_k": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "How many tokens (for each position) to return log probabilities for."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "(Optional) If specified, log probabilities for each token position will be returned."
|
||||
},
|
||||
"tool_config": {
|
||||
"$ref": "#/components/schemas/ToolConfig",
|
||||
"description": "(Optional) Configuration for tool use."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model_id",
|
||||
"messages"
|
||||
],
|
||||
"title": "ChatCompletionRequest"
|
||||
},
|
||||
"ChatCompletionResponseEvent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -7560,87 +7275,6 @@
|
|||
"title": "ToolCallDelta",
|
||||
"description": "A tool call content delta for streaming responses."
|
||||
},
|
||||
"CompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model_id": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The content to generate a completion for."
|
||||
},
|
||||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams",
|
||||
"description": "(Optional) Parameters to control the sampling strategy."
|
||||
},
|
||||
"response_format": {
|
||||
"$ref": "#/components/schemas/ResponseFormat",
|
||||
"description": "(Optional) Grammar specification for guided (structured) decoding."
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean",
|
||||
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"top_k": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "How many tokens (for each position) to return log probabilities for."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "(Optional) If specified, log probabilities for each token position will be returned."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model_id",
|
||||
"content"
|
||||
],
|
||||
"title": "CompletionRequest"
|
||||
},
|
||||
"CompletionResponseStreamChunk": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"metrics": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricInResponse"
|
||||
},
|
||||
"description": "(Optional) List of metrics associated with the API response"
|
||||
},
|
||||
"delta": {
|
||||
"type": "string",
|
||||
"description": "New content generated since last chunk. This can be one or more tokens."
|
||||
},
|
||||
"stop_reason": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"end_of_turn",
|
||||
"end_of_message",
|
||||
"out_of_tokens"
|
||||
],
|
||||
"description": "Optional reason why generation stopped, if complete"
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/TokenLogProbs"
|
||||
},
|
||||
"description": "Optional log probabilities for generated tokens"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"delta"
|
||||
],
|
||||
"title": "CompletionResponseStreamChunk",
|
||||
"description": "A chunk of a streamed completion response."
|
||||
},
|
||||
"AgentConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -7848,6 +7482,14 @@
|
|||
"default": true,
|
||||
"description": "Whether this parameter is required for tool invocation"
|
||||
},
|
||||
"items": {
|
||||
"type": "object",
|
||||
"description": "Type of the elements when parameter_type is array"
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "(Optional) Title of the parameter"
|
||||
},
|
||||
"default": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
@ -18779,11 +18421,6 @@
|
|||
"description": "Main functionalities provided by this API:\n- Create agents with specific instructions and ability to use tools.\n- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".\n- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).\n- Agents can be provided with various shields (see the Safety API for more details).\n- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.",
|
||||
"x-displayName": "Agents API for creating and interacting with agentic systems."
|
||||
},
|
||||
{
|
||||
"name": "BatchInference (Coming Soon)",
|
||||
"description": "This is an asynchronous API. If the request is successful, the response will be a job which can be polled for completion.\n\nNOTE: This API is not yet implemented and is subject to change in concert with other asynchronous APIs\nincluding (post-training, evals, etc).",
|
||||
"x-displayName": "Batch inference API for generating completions and chat completions."
|
||||
},
|
||||
{
|
||||
"name": "Benchmarks"
|
||||
},
|
||||
|
@ -18858,7 +18495,6 @@
|
|||
"name": "Operations",
|
||||
"tags": [
|
||||
"Agents",
|
||||
"BatchInference (Coming Soon)",
|
||||
"Benchmarks",
|
||||
"DatasetIO",
|
||||
"Datasets",
|
||||
|
|
497
docs/static/llama-stack-spec.yaml
vendored
497
docs/static/llama-stack-spec.yaml
vendored
|
@ -43,72 +43,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/AppendRowsRequest'
|
||||
required: true
|
||||
/v1/inference/batch-chat-completion:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
A BatchChatCompletionResponse with the full completions.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchChatCompletionResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate chat completions for a batch of messages using the specified model.
|
||||
description: >-
|
||||
Generate chat completions for a batch of messages using the specified model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchChatCompletionRequest'
|
||||
required: true
|
||||
/v1/inference/batch-completion:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
A BatchCompletionResponse with the full completions.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchCompletionResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate completions for a batch of content using the specified model.
|
||||
description: >-
|
||||
Generate completions for a batch of content using the specified model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchCompletionRequest'
|
||||
required: true
|
||||
/v1alpha/post-training/job/cancel:
|
||||
post:
|
||||
responses:
|
||||
|
@ -186,7 +120,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- BatchInference (Coming Soon)
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate a chat completion for the given messages using the specified model.
|
||||
description: >-
|
||||
|
@ -198,43 +132,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/ChatCompletionRequest'
|
||||
required: true
|
||||
/v1/inference/completion:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
If stream=False, returns a CompletionResponse with the full completion.
|
||||
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompletionResponse'
|
||||
text/event-stream:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompletionResponseStreamChunk'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- BatchInference (Coming Soon)
|
||||
summary: >-
|
||||
Generate a completion for the given content using the specified model.
|
||||
description: >-
|
||||
Generate a completion for the given content using the specified model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompletionRequest'
|
||||
required: true
|
||||
/v1/agents:
|
||||
get:
|
||||
responses:
|
||||
|
@ -4559,6 +4456,16 @@ components:
|
|||
required:
|
||||
- rows
|
||||
title: AppendRowsRequest
|
||||
CancelTrainingJobRequest:
|
||||
type: object
|
||||
properties:
|
||||
job_uuid:
|
||||
type: string
|
||||
description: The UUID of the job to cancel.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- job_uuid
|
||||
title: CancelTrainingJobRequest
|
||||
CompletionMessage:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4959,6 +4866,16 @@ components:
|
|||
required:
|
||||
type: boolean
|
||||
default: true
|
||||
items:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
title:
|
||||
type: string
|
||||
default:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
|
@ -5076,224 +4993,6 @@ components:
|
|||
title: UserMessage
|
||||
description: >-
|
||||
A message from the user in a chat conversation.
|
||||
BatchChatCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
model_id:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the model to use. The model must be registered with
|
||||
Llama Stack and available via the /models endpoint.
|
||||
messages_batch:
|
||||
type: array
|
||||
items:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Message'
|
||||
description: >-
|
||||
The messages to generate completions for.
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
description: >-
|
||||
(Optional) Parameters to control the sampling strategy.
|
||||
tools:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolDefinition'
|
||||
description: >-
|
||||
(Optional) List of tool definitions available to the model.
|
||||
tool_config:
|
||||
$ref: '#/components/schemas/ToolConfig'
|
||||
description: (Optional) Configuration for tool use.
|
||||
response_format:
|
||||
$ref: '#/components/schemas/ResponseFormat'
|
||||
description: >-
|
||||
(Optional) Grammar specification for guided (structured) decoding.
|
||||
logprobs:
|
||||
type: object
|
||||
properties:
|
||||
top_k:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
How many tokens (for each position) to return log probabilities for.
|
||||
additionalProperties: false
|
||||
description: >-
|
||||
(Optional) If specified, log probabilities for each token position will
|
||||
be returned.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model_id
|
||||
- messages_batch
|
||||
title: BatchChatCompletionRequest
|
||||
BatchChatCompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
batch:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ChatCompletionResponse'
|
||||
description: >-
|
||||
List of chat completion responses, one for each conversation in the batch
|
||||
additionalProperties: false
|
||||
required:
|
||||
- batch
|
||||
title: BatchChatCompletionResponse
|
||||
description: >-
|
||||
Response from a batch chat completion request.
|
||||
ChatCompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
description: >-
|
||||
(Optional) List of metrics associated with the API response
|
||||
completion_message:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
description: The complete response message
|
||||
logprobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
description: >-
|
||||
Optional log probabilities for generated tokens
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completion_message
|
||||
title: ChatCompletionResponse
|
||||
description: Response from a chat completion request.
|
||||
MetricInResponse:
|
||||
type: object
|
||||
properties:
|
||||
metric:
|
||||
type: string
|
||||
description: The name of the metric
|
||||
value:
|
||||
oneOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
description: The numeric value of the metric
|
||||
unit:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The unit of measurement for the metric value
|
||||
additionalProperties: false
|
||||
required:
|
||||
- metric
|
||||
- value
|
||||
title: MetricInResponse
|
||||
description: >-
|
||||
A metric value included in API responses.
|
||||
TokenLogProbs:
|
||||
type: object
|
||||
properties:
|
||||
logprobs_by_token:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: number
|
||||
description: >-
|
||||
Dictionary mapping tokens to their log probabilities
|
||||
additionalProperties: false
|
||||
required:
|
||||
- logprobs_by_token
|
||||
title: TokenLogProbs
|
||||
description: Log probabilities for generated tokens.
|
||||
BatchCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
model_id:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the model to use. The model must be registered with
|
||||
Llama Stack and available via the /models endpoint.
|
||||
content_batch:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: The content to generate completions for.
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
description: >-
|
||||
(Optional) Parameters to control the sampling strategy.
|
||||
response_format:
|
||||
$ref: '#/components/schemas/ResponseFormat'
|
||||
description: >-
|
||||
(Optional) Grammar specification for guided (structured) decoding.
|
||||
logprobs:
|
||||
type: object
|
||||
properties:
|
||||
top_k:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
How many tokens (for each position) to return log probabilities for.
|
||||
additionalProperties: false
|
||||
description: >-
|
||||
(Optional) If specified, log probabilities for each token position will
|
||||
be returned.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model_id
|
||||
- content_batch
|
||||
title: BatchCompletionRequest
|
||||
BatchCompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
batch:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/CompletionResponse'
|
||||
description: >-
|
||||
List of completion responses, one for each input in the batch
|
||||
additionalProperties: false
|
||||
required:
|
||||
- batch
|
||||
title: BatchCompletionResponse
|
||||
description: >-
|
||||
Response from a batch completion request.
|
||||
CompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
description: >-
|
||||
(Optional) List of metrics associated with the API response
|
||||
content:
|
||||
type: string
|
||||
description: The generated completion text
|
||||
stop_reason:
|
||||
type: string
|
||||
enum:
|
||||
- end_of_turn
|
||||
- end_of_message
|
||||
- out_of_tokens
|
||||
description: Reason why generation stopped
|
||||
logprobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
description: >-
|
||||
Optional log probabilities for generated tokens
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- stop_reason
|
||||
title: CompletionResponse
|
||||
description: Response from a completion request.
|
||||
CancelTrainingJobRequest:
|
||||
type: object
|
||||
properties:
|
||||
job_uuid:
|
||||
type: string
|
||||
description: The UUID of the job to cancel.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- job_uuid
|
||||
title: CancelTrainingJobRequest
|
||||
ChatCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5372,6 +5071,65 @@ components:
|
|||
- model_id
|
||||
- messages
|
||||
title: ChatCompletionRequest
|
||||
ChatCompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
description: >-
|
||||
(Optional) List of metrics associated with the API response
|
||||
completion_message:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
description: The complete response message
|
||||
logprobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
description: >-
|
||||
Optional log probabilities for generated tokens
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completion_message
|
||||
title: ChatCompletionResponse
|
||||
description: Response from a chat completion request.
|
||||
MetricInResponse:
|
||||
type: object
|
||||
properties:
|
||||
metric:
|
||||
type: string
|
||||
description: The name of the metric
|
||||
value:
|
||||
oneOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
description: The numeric value of the metric
|
||||
unit:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The unit of measurement for the metric value
|
||||
additionalProperties: false
|
||||
required:
|
||||
- metric
|
||||
- value
|
||||
title: MetricInResponse
|
||||
description: >-
|
||||
A metric value included in API responses.
|
||||
TokenLogProbs:
|
||||
type: object
|
||||
properties:
|
||||
logprobs_by_token:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: number
|
||||
description: >-
|
||||
Dictionary mapping tokens to their log probabilities
|
||||
additionalProperties: false
|
||||
required:
|
||||
- logprobs_by_token
|
||||
title: TokenLogProbs
|
||||
description: Log probabilities for generated tokens.
|
||||
ChatCompletionResponseEvent:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5507,81 +5265,6 @@ components:
|
|||
title: ToolCallDelta
|
||||
description: >-
|
||||
A tool call content delta for streaming responses.
|
||||
CompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
model_id:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the model to use. The model must be registered with
|
||||
Llama Stack and available via the /models endpoint.
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The content to generate a completion for.
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
description: >-
|
||||
(Optional) Parameters to control the sampling strategy.
|
||||
response_format:
|
||||
$ref: '#/components/schemas/ResponseFormat'
|
||||
description: >-
|
||||
(Optional) Grammar specification for guided (structured) decoding.
|
||||
stream:
|
||||
type: boolean
|
||||
description: >-
|
||||
(Optional) If True, generate an SSE event stream of the response. Defaults
|
||||
to False.
|
||||
logprobs:
|
||||
type: object
|
||||
properties:
|
||||
top_k:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
How many tokens (for each position) to return log probabilities for.
|
||||
additionalProperties: false
|
||||
description: >-
|
||||
(Optional) If specified, log probabilities for each token position will
|
||||
be returned.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model_id
|
||||
- content
|
||||
title: CompletionRequest
|
||||
CompletionResponseStreamChunk:
|
||||
type: object
|
||||
properties:
|
||||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
description: >-
|
||||
(Optional) List of metrics associated with the API response
|
||||
delta:
|
||||
type: string
|
||||
description: >-
|
||||
New content generated since last chunk. This can be one or more tokens.
|
||||
stop_reason:
|
||||
type: string
|
||||
enum:
|
||||
- end_of_turn
|
||||
- end_of_message
|
||||
- out_of_tokens
|
||||
description: >-
|
||||
Optional reason why generation stopped, if complete
|
||||
logprobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
description: >-
|
||||
Optional log probabilities for generated tokens
|
||||
additionalProperties: false
|
||||
required:
|
||||
- delta
|
||||
title: CompletionResponseStreamChunk
|
||||
description: >-
|
||||
A chunk of a streamed completion response.
|
||||
AgentConfig:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5730,6 +5413,13 @@ components:
|
|||
default: true
|
||||
description: >-
|
||||
Whether this parameter is required for tool invocation
|
||||
items:
|
||||
type: object
|
||||
description: >-
|
||||
Type of the elements when parameter_type is array
|
||||
title:
|
||||
type: string
|
||||
description: (Optional) Title of the parameter
|
||||
default:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
|
@ -13983,18 +13673,6 @@ tags:
|
|||
the RAG Tool and Vector IO APIs for more details.
|
||||
x-displayName: >-
|
||||
Agents API for creating and interacting with agentic systems.
|
||||
- name: BatchInference (Coming Soon)
|
||||
description: >-
|
||||
This is an asynchronous API. If the request is successful, the response will
|
||||
be a job which can be polled for completion.
|
||||
|
||||
|
||||
NOTE: This API is not yet implemented and is subject to change in concert with
|
||||
other asynchronous APIs
|
||||
|
||||
including (post-training, evals, etc).
|
||||
x-displayName: >-
|
||||
Batch inference API for generating completions and chat completions.
|
||||
- name: Benchmarks
|
||||
- name: DatasetIO
|
||||
- name: Datasets
|
||||
|
@ -14037,7 +13715,6 @@ x-tagGroups:
|
|||
- name: Operations
|
||||
tags:
|
||||
- Agents
|
||||
- BatchInference (Coming Soon)
|
||||
- Benchmarks
|
||||
- DatasetIO
|
||||
- Datasets
|
||||
|
|
|
@ -1,79 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.job_types import Job
|
||||
from llama_stack.apis.inference import (
|
||||
InterleavedContent,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
ToolChoice,
|
||||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import webmethod
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class BatchInference(Protocol):
|
||||
"""Batch inference API for generating completions and chat completions.
|
||||
|
||||
This is an asynchronous API. If the request is successful, the response will be a job which can be polled for completion.
|
||||
|
||||
NOTE: This API is not yet implemented and is subject to change in concert with other asynchronous APIs
|
||||
including (post-training, evals, etc).
|
||||
"""
|
||||
|
||||
@webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def completion(
|
||||
self,
|
||||
model: str,
|
||||
content_batch: list[InterleavedContent],
|
||||
sampling_params: SamplingParams | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
) -> Job:
|
||||
"""Generate completions for a batch of content.
|
||||
|
||||
:param model: The model to use for the completion.
|
||||
:param content_batch: The content to complete.
|
||||
:param sampling_params: The sampling parameters to use for the completion.
|
||||
:param response_format: The response format to use for the completion.
|
||||
:param logprobs: The logprobs to use for the completion.
|
||||
:returns: A job for the completion.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def chat_completion(
|
||||
self,
|
||||
model: str,
|
||||
messages_batch: list[list[Message]],
|
||||
sampling_params: SamplingParams | None = None,
|
||||
# zero-shot tool definitions as input to the model
|
||||
tools: list[ToolDefinition] | None = None,
|
||||
tool_choice: ToolChoice | None = ToolChoice.auto,
|
||||
tool_prompt_format: ToolPromptFormat | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
) -> Job:
|
||||
"""Generate chat completions for a batch of messages.
|
||||
|
||||
:param model: The model to use for the chat completion.
|
||||
:param messages_batch: The messages to complete.
|
||||
:param sampling_params: The sampling parameters to use for the completion.
|
||||
:param tools: The tools to use for the chat completion.
|
||||
:param tool_choice: The tool choice to use for the chat completion.
|
||||
:param tool_prompt_format: The tool prompt format to use for the chat completion.
|
||||
:param response_format: The response format to use for the chat completion.
|
||||
:param logprobs: The logprobs to use for the chat completion.
|
||||
:returns: A job for the chat completion.
|
||||
"""
|
||||
...
|
|
@ -975,26 +975,6 @@ class EmbeddingTaskType(Enum):
|
|||
document = "document"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchCompletionResponse(BaseModel):
|
||||
"""Response from a batch completion request.
|
||||
|
||||
:param batch: List of completion responses, one for each input in the batch
|
||||
"""
|
||||
|
||||
batch: list[CompletionResponse]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchChatCompletionResponse(BaseModel):
|
||||
"""Response from a batch chat completion request.
|
||||
|
||||
:param batch: List of chat completion responses, one for each conversation in the batch
|
||||
"""
|
||||
|
||||
batch: list[ChatCompletionResponse]
|
||||
|
||||
|
||||
class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
|
||||
input_messages: list[OpenAIMessageParam]
|
||||
|
||||
|
@ -1028,7 +1008,6 @@ class InferenceProvider(Protocol):
|
|||
|
||||
model_store: ModelStore | None = None
|
||||
|
||||
@webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -1051,27 +1030,6 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
|
||||
async def batch_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content_batch: list[InterleavedContent],
|
||||
sampling_params: SamplingParams | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
) -> BatchCompletionResponse:
|
||||
"""Generate completions for a batch of content using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||
:param content_batch: The content to generate completions for.
|
||||
:param sampling_params: (Optional) Parameters to control the sampling strategy.
|
||||
:param response_format: (Optional) Grammar specification for guided (structured) decoding.
|
||||
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
||||
:returns: A BatchCompletionResponse with the full completions.
|
||||
"""
|
||||
raise NotImplementedError("Batch completion is not implemented")
|
||||
return # this is so mypy's safe-super rule will consider the method concrete
|
||||
|
||||
@webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def chat_completion(
|
||||
self,
|
||||
|
@ -1112,31 +1070,6 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
|
||||
async def batch_chat_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
messages_batch: list[list[Message]],
|
||||
sampling_params: SamplingParams | None = None,
|
||||
tools: list[ToolDefinition] | None = None,
|
||||
tool_config: ToolConfig | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
) -> BatchChatCompletionResponse:
|
||||
"""Generate chat completions for a batch of messages using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||
:param messages_batch: The messages to generate completions for.
|
||||
:param sampling_params: (Optional) Parameters to control the sampling strategy.
|
||||
:param tools: (Optional) List of tool definitions available to the model.
|
||||
:param tool_config: (Optional) Configuration for tool use.
|
||||
:param response_format: (Optional) Grammar specification for guided (structured) decoding.
|
||||
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
||||
:returns: A BatchChatCompletionResponse with the full completions.
|
||||
"""
|
||||
raise NotImplementedError("Batch chat completion is not implemented")
|
||||
return # this is so mypy's safe-super rule will consider the method concrete
|
||||
|
||||
@webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def embeddings(
|
||||
self,
|
||||
|
|
|
@ -27,6 +27,8 @@ class ToolParameter(BaseModel):
|
|||
:param parameter_type: Type of the parameter (e.g., string, integer)
|
||||
:param description: Human-readable description of what the parameter does
|
||||
:param required: Whether this parameter is required for tool invocation
|
||||
:param items: Type of the elements when parameter_type is array
|
||||
:param title: (Optional) Title of the parameter
|
||||
:param default: (Optional) Default value for the parameter if not provided
|
||||
"""
|
||||
|
||||
|
@ -34,6 +36,8 @@ class ToolParameter(BaseModel):
|
|||
parameter_type: str
|
||||
description: str
|
||||
required: bool = Field(default=True)
|
||||
items: dict | None = None
|
||||
title: str | None = None
|
||||
default: Any | None = None
|
||||
|
||||
|
||||
|
|
|
@ -20,8 +20,6 @@ from llama_stack.apis.common.content_types import (
|
|||
)
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
|
||||
from llama_stack.apis.inference import (
|
||||
BatchChatCompletionResponse,
|
||||
BatchCompletionResponse,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionResponseEventType,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
|
@ -273,30 +271,6 @@ class InferenceRouter(Inference):
|
|||
)
|
||||
return response
|
||||
|
||||
async def batch_chat_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
messages_batch: list[list[Message]],
|
||||
tools: list[ToolDefinition] | None = None,
|
||||
tool_config: ToolConfig | None = None,
|
||||
sampling_params: SamplingParams | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
) -> BatchChatCompletionResponse:
|
||||
logger.debug(
|
||||
f"InferenceRouter.batch_chat_completion: {model_id=}, {len(messages_batch)=}, {sampling_params=}, {response_format=}, {logprobs=}",
|
||||
)
|
||||
provider = await self.routing_table.get_provider_impl(model_id)
|
||||
return await provider.batch_chat_completion(
|
||||
model_id=model_id,
|
||||
messages_batch=messages_batch,
|
||||
tools=tools,
|
||||
tool_config=tool_config,
|
||||
sampling_params=sampling_params,
|
||||
response_format=response_format,
|
||||
logprobs=logprobs,
|
||||
)
|
||||
|
||||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -338,20 +312,6 @@ class InferenceRouter(Inference):
|
|||
|
||||
return response
|
||||
|
||||
async def batch_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content_batch: list[InterleavedContent],
|
||||
sampling_params: SamplingParams | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
) -> BatchCompletionResponse:
|
||||
logger.debug(
|
||||
f"InferenceRouter.batch_completion: {model_id=}, {len(content_batch)=}, {sampling_params=}, {response_format=}, {logprobs=}",
|
||||
)
|
||||
provider = await self.routing_table.get_provider_impl(model_id)
|
||||
return await provider.batch_completion(model_id, content_batch, sampling_params, response_format, logprobs)
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
|
|
|
@ -14,7 +14,6 @@ from typing import Any
|
|||
import yaml
|
||||
|
||||
from llama_stack.apis.agents import Agents
|
||||
from llama_stack.apis.batch_inference import BatchInference
|
||||
from llama_stack.apis.benchmarks import Benchmarks
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Datasets
|
||||
|
@ -54,7 +53,6 @@ class LlamaStack(
|
|||
Providers,
|
||||
VectorDBs,
|
||||
Inference,
|
||||
BatchInference,
|
||||
Agents,
|
||||
Safety,
|
||||
SyntheticDataGeneration,
|
||||
|
|
|
@ -92,6 +92,8 @@ class ToolParamDefinition(BaseModel):
|
|||
param_type: str
|
||||
description: str | None = None
|
||||
required: bool | None = True
|
||||
items: Any | None = None
|
||||
title: str | None = None
|
||||
default: Any | None = None
|
||||
|
||||
|
||||
|
|
|
@ -798,6 +798,8 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
param_type=param.parameter_type,
|
||||
description=param.description,
|
||||
required=param.required,
|
||||
items=param.items,
|
||||
title=param.title,
|
||||
default=param.default,
|
||||
)
|
||||
for param in tool_def.parameters
|
||||
|
@ -841,6 +843,8 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
param_type=param.parameter_type,
|
||||
description=param.description,
|
||||
required=param.required,
|
||||
items=param.items,
|
||||
title=param.title,
|
||||
default=param.default,
|
||||
)
|
||||
for param in tool_def.parameters
|
||||
|
@ -920,7 +924,7 @@ async def get_raw_document_text(document: Document) -> str:
|
|||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
elif not (document.mime_type.startswith("text/") or document.mime_type == "application/yaml"):
|
||||
elif not (document.mime_type.startswith("text/") or document.mime_type in ("application/yaml", "application/json")):
|
||||
raise ValueError(f"Unexpected document mime type: {document.mime_type}")
|
||||
|
||||
if isinstance(document.content, URL):
|
||||
|
|
|
@ -568,6 +568,7 @@ class StreamingResponseOrchestrator:
|
|||
description=param.description,
|
||||
required=param.required,
|
||||
default=param.default,
|
||||
items=param.items,
|
||||
)
|
||||
for param in t.parameters
|
||||
},
|
||||
|
|
|
@ -18,8 +18,6 @@ from llama_stack.apis.common.content_types import (
|
|||
ToolCallParseStatus,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
BatchChatCompletionResponse,
|
||||
BatchCompletionResponse,
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionResponseEvent,
|
||||
|
@ -219,41 +217,6 @@ class MetaReferenceInferenceImpl(
|
|||
results = await self._nonstream_completion([request])
|
||||
return results[0]
|
||||
|
||||
async def batch_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content_batch: list[InterleavedContent],
|
||||
sampling_params: SamplingParams | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
stream: bool | None = False,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
) -> BatchCompletionResponse:
|
||||
if sampling_params is None:
|
||||
sampling_params = SamplingParams()
|
||||
if logprobs:
|
||||
assert logprobs.top_k == 1, f"Unexpected top_k={logprobs.top_k}"
|
||||
|
||||
content_batch = [
|
||||
augment_content_with_response_format_prompt(response_format, content) for content in content_batch
|
||||
]
|
||||
|
||||
request_batch = []
|
||||
for content in content_batch:
|
||||
request = CompletionRequest(
|
||||
model=model_id,
|
||||
content=content,
|
||||
sampling_params=sampling_params,
|
||||
response_format=response_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
)
|
||||
self.check_model(request)
|
||||
request = await convert_request_to_raw(request)
|
||||
request_batch.append(request)
|
||||
|
||||
results = await self._nonstream_completion(request_batch)
|
||||
return BatchCompletionResponse(batch=results)
|
||||
|
||||
async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
|
||||
tokenizer = self.generator.formatter.tokenizer
|
||||
|
||||
|
@ -399,49 +362,6 @@ class MetaReferenceInferenceImpl(
|
|||
results = await self._nonstream_chat_completion([request])
|
||||
return results[0]
|
||||
|
||||
async def batch_chat_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
messages_batch: list[list[Message]],
|
||||
sampling_params: SamplingParams | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
tools: list[ToolDefinition] | None = None,
|
||||
stream: bool | None = False,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
tool_config: ToolConfig | None = None,
|
||||
) -> BatchChatCompletionResponse:
|
||||
if sampling_params is None:
|
||||
sampling_params = SamplingParams()
|
||||
if logprobs:
|
||||
assert logprobs.top_k == 1, f"Unexpected top_k={logprobs.top_k}"
|
||||
|
||||
# wrapper request to make it easier to pass around (internal only, not exposed to API)
|
||||
request_batch = []
|
||||
for messages in messages_batch:
|
||||
request = ChatCompletionRequest(
|
||||
model=model_id,
|
||||
messages=messages,
|
||||
sampling_params=sampling_params,
|
||||
tools=tools or [],
|
||||
response_format=response_format,
|
||||
logprobs=logprobs,
|
||||
tool_config=tool_config or ToolConfig(),
|
||||
)
|
||||
self.check_model(request)
|
||||
|
||||
# augment and rewrite messages depending on the model
|
||||
request.messages = chat_completion_request_to_messages(request, self.llama_model.core_model_id.value)
|
||||
# download media and convert to raw content so we can send it to the model
|
||||
request = await convert_request_to_raw(request)
|
||||
request_batch.append(request)
|
||||
|
||||
if self.config.create_distributed_process_group:
|
||||
if SEMAPHORE.locked():
|
||||
raise RuntimeError("Only one concurrent request is supported")
|
||||
|
||||
results = await self._nonstream_chat_completion(request_batch)
|
||||
return BatchChatCompletionResponse(batch=results)
|
||||
|
||||
async def _nonstream_chat_completion(
|
||||
self, request_batch: list[ChatCompletionRequest]
|
||||
) -> list[ChatCompletionResponse]:
|
||||
|
|
|
@ -61,6 +61,7 @@ logger = get_logger(name=__name__, category="inference::fireworks")
|
|||
class FireworksInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
||||
embedding_model_metadata = {
|
||||
"nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192},
|
||||
"accounts/fireworks/models/qwen3-embedding-8b": {"embedding_dimension": 4096, "context_length": 40960},
|
||||
}
|
||||
|
||||
def __init__(self, config: FireworksImplConfig) -> None:
|
||||
|
|
|
@ -6,8 +6,7 @@
|
|||
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
from collections.abc import AsyncGenerator, AsyncIterator
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Any
|
||||
|
||||
from ollama import AsyncClient as AsyncOllamaClient
|
||||
|
@ -33,10 +32,6 @@ from llama_stack.apis.inference import (
|
|||
JsonSchemaResponseFormat,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
|
@ -62,7 +57,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
|
|||
OpenAICompatCompletionChoice,
|
||||
OpenAICompatCompletionResponse,
|
||||
get_sampling_options,
|
||||
prepare_openai_completion_params,
|
||||
process_chat_completion_response,
|
||||
process_chat_completion_stream_response,
|
||||
process_completion_response,
|
||||
|
@ -75,7 +69,6 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
content_has_media,
|
||||
convert_image_content_to_url,
|
||||
interleaved_content_as_str,
|
||||
localize_image_content,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
@ -84,6 +77,7 @@ logger = get_logger(name=__name__, category="inference::ollama")
|
|||
|
||||
class OllamaInferenceAdapter(
|
||||
OpenAIMixin,
|
||||
ModelRegistryHelper,
|
||||
InferenceProvider,
|
||||
ModelsProtocolPrivate,
|
||||
):
|
||||
|
@ -129,6 +123,8 @@ class OllamaInferenceAdapter(
|
|||
],
|
||||
)
|
||||
self.config = config
|
||||
# Ollama does not support image urls, so we need to download the image and convert it to base64
|
||||
self.download_images = True
|
||||
self._clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {}
|
||||
|
||||
@property
|
||||
|
@ -173,9 +169,6 @@ class OllamaInferenceAdapter(
|
|||
async def shutdown(self) -> None:
|
||||
self._clients.clear()
|
||||
|
||||
async def unregister_model(self, model_id: str) -> None:
|
||||
pass
|
||||
|
||||
async def _get_model(self, model_id: str) -> Model:
|
||||
if not self.model_store:
|
||||
raise ValueError("Model store not set")
|
||||
|
@ -403,75 +396,6 @@ class OllamaInferenceAdapter(
|
|||
|
||||
raise UnsupportedModelError(model.provider_model_id, list(self._model_cache.keys()))
|
||||
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
model: str,
|
||||
messages: list[OpenAIMessageParam],
|
||||
frequency_penalty: float | None = None,
|
||||
function_call: str | dict[str, Any] | None = None,
|
||||
functions: list[dict[str, Any]] | None = None,
|
||||
logit_bias: dict[str, float] | None = None,
|
||||
logprobs: bool | None = None,
|
||||
max_completion_tokens: int | None = None,
|
||||
max_tokens: int | None = None,
|
||||
n: int | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
presence_penalty: float | None = None,
|
||||
response_format: OpenAIResponseFormatParam | None = None,
|
||||
seed: int | None = None,
|
||||
stop: str | list[str] | None = None,
|
||||
stream: bool | None = None,
|
||||
stream_options: dict[str, Any] | None = None,
|
||||
temperature: float | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
top_logprobs: int | None = None,
|
||||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
model_obj = await self._get_model(model)
|
||||
|
||||
# Ollama does not support image urls, so we need to download the image and convert it to base64
|
||||
async def _convert_message(m: OpenAIMessageParam) -> OpenAIMessageParam:
|
||||
if isinstance(m.content, list):
|
||||
for c in m.content:
|
||||
if c.type == "image_url" and c.image_url and c.image_url.url:
|
||||
localize_result = await localize_image_content(c.image_url.url)
|
||||
if localize_result is None:
|
||||
raise ValueError(f"Failed to localize image content from {c.image_url.url}")
|
||||
|
||||
content, format = localize_result
|
||||
c.image_url.url = f"data:image/{format};base64,{base64.b64encode(content).decode('utf-8')}"
|
||||
return m
|
||||
|
||||
messages = [await _convert_message(m) for m in messages]
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
messages=messages,
|
||||
frequency_penalty=frequency_penalty,
|
||||
function_call=function_call,
|
||||
functions=functions,
|
||||
logit_bias=logit_bias,
|
||||
logprobs=logprobs,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
presence_penalty=presence_penalty,
|
||||
response_format=response_format,
|
||||
seed=seed,
|
||||
stop=stop,
|
||||
stream=stream,
|
||||
stream_options=stream_options,
|
||||
temperature=temperature,
|
||||
tool_choice=tool_choice,
|
||||
tools=tools,
|
||||
top_logprobs=top_logprobs,
|
||||
top_p=top_p,
|
||||
user=user,
|
||||
)
|
||||
return await OpenAIMixin.openai_chat_completion(self, **params)
|
||||
|
||||
|
||||
async def convert_message_to_openai_dict_for_ollama(message: Message) -> list[dict]:
|
||||
async def _convert_content(content) -> dict:
|
||||
|
|
|
@ -21,8 +21,6 @@ logger = get_logger(name=__name__, category="inference::openai")
|
|||
# | completion | LiteLLMOpenAIMixin |
|
||||
# | chat_completion | LiteLLMOpenAIMixin |
|
||||
# | embedding | LiteLLMOpenAIMixin |
|
||||
# | batch_completion | LiteLLMOpenAIMixin |
|
||||
# | batch_chat_completion | LiteLLMOpenAIMixin |
|
||||
# | openai_completion | OpenAIMixin |
|
||||
# | openai_chat_completion | OpenAIMixin |
|
||||
# | openai_embeddings | OpenAIMixin |
|
||||
|
|
|
@ -805,6 +805,10 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
|
|||
properties[param_name].update(description=param.description)
|
||||
if param.default:
|
||||
properties[param_name].update(default=param.default)
|
||||
if param.items:
|
||||
properties[param_name].update(items=param.items)
|
||||
if param.title:
|
||||
properties[param_name].update(title=param.title)
|
||||
if param.required:
|
||||
required.append(param_name)
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import AsyncIterator
|
||||
|
@ -26,6 +27,7 @@ from llama_stack.apis.models import ModelType
|
|||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
|
||||
|
||||
logger = get_logger(name=__name__, category="providers::utils")
|
||||
|
||||
|
@ -51,6 +53,10 @@ class OpenAIMixin(ModelRegistryHelper, ABC):
|
|||
# This is useful for providers that do not return a unique id in the response.
|
||||
overwrite_completion_id: bool = False
|
||||
|
||||
# Allow subclasses to control whether to download images and convert to base64
|
||||
# for providers that require base64 encoded images instead of URLs.
|
||||
download_images: bool = False
|
||||
|
||||
# Embedding model metadata for this provider
|
||||
# Can be set by subclasses or instances to provide embedding models
|
||||
# Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}}
|
||||
|
@ -239,6 +245,24 @@ class OpenAIMixin(ModelRegistryHelper, ABC):
|
|||
"""
|
||||
Direct OpenAI chat completion API call.
|
||||
"""
|
||||
if self.download_images:
|
||||
|
||||
async def _localize_image_url(m: OpenAIMessageParam) -> OpenAIMessageParam:
|
||||
if isinstance(m.content, list):
|
||||
for c in m.content:
|
||||
if c.type == "image_url" and c.image_url and c.image_url.url and "http" in c.image_url.url:
|
||||
localize_result = await localize_image_content(c.image_url.url)
|
||||
if localize_result is None:
|
||||
raise ValueError(
|
||||
f"Failed to localize image content from {c.image_url.url[:42]}{'...' if len(c.image_url.url) > 42 else ''}"
|
||||
)
|
||||
content, format = localize_result
|
||||
c.image_url.url = f"data:image/{format};base64,{base64.b64encode(content).decode('utf-8')}"
|
||||
# else it's a string and we don't need to modify it
|
||||
return m
|
||||
|
||||
messages = [await _localize_image_url(m) for m in messages]
|
||||
|
||||
resp = await self.client.chat.completions.create(
|
||||
**await prepare_openai_completion_params(
|
||||
model=await self._get_provider_model_id(model),
|
||||
|
|
|
@ -192,6 +192,14 @@ async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
|
|||
format = "png"
|
||||
|
||||
return content, format
|
||||
elif uri.startswith("data"):
|
||||
# data:image/{format};base64,{data}
|
||||
match = re.match(r"data:image/(\w+);base64,(.+)", uri)
|
||||
if not match:
|
||||
raise ValueError(f"Invalid data URL format, {uri[:40]}...")
|
||||
fmt, image_data = match.groups()
|
||||
content = base64.b64decode(image_data)
|
||||
return content, fmt
|
||||
else:
|
||||
return None
|
||||
|
||||
|
|
|
@ -120,6 +120,10 @@ async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefs
|
|||
name=param_name,
|
||||
parameter_type=param_schema.get("type", "string"),
|
||||
description=param_schema.get("description", ""),
|
||||
required="default" not in param_schema,
|
||||
items=param_schema.get("items", None),
|
||||
title=param_schema.get("title", None),
|
||||
default=param_schema.get("default", None),
|
||||
)
|
||||
)
|
||||
tools.append(
|
||||
|
|
127
llama_stack/ui/package-lock.json
generated
127
llama_stack/ui/package-lock.json
generated
|
@ -28,7 +28,7 @@
|
|||
"react-markdown": "^10.1.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remeda": "^2.32.0",
|
||||
"shiki": "^1.29.2",
|
||||
"shiki": "^3.13.0",
|
||||
"sonner": "^2.0.7",
|
||||
"tailwind-merge": "^3.3.1"
|
||||
},
|
||||
|
@ -51,7 +51,7 @@
|
|||
"prettier": "3.6.2",
|
||||
"tailwindcss": "^4",
|
||||
"ts-node": "^10.9.2",
|
||||
"tw-animate-css": "^1.2.9",
|
||||
"tw-animate-css": "^1.4.0",
|
||||
"typescript": "^5"
|
||||
}
|
||||
},
|
||||
|
@ -3250,65 +3250,63 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@shikijs/core": {
|
||||
"version": "1.29.2",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/core/-/core-1.29.2.tgz",
|
||||
"integrity": "sha512-vju0lY9r27jJfOY4Z7+Rt/nIOjzJpZ3y+nYpqtUZInVoXQ/TJZcfGnNOGnKjFdVZb8qexiCuSlZRKcGfhhTTZQ==",
|
||||
"version": "3.13.0",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/core/-/core-3.13.0.tgz",
|
||||
"integrity": "sha512-3P8rGsg2Eh2qIHekwuQjzWhKI4jV97PhvYjYUzGqjvJfqdQPz+nMlfWahU24GZAyW1FxFI1sYjyhfh5CoLmIUA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@shikijs/engine-javascript": "1.29.2",
|
||||
"@shikijs/engine-oniguruma": "1.29.2",
|
||||
"@shikijs/types": "1.29.2",
|
||||
"@shikijs/vscode-textmate": "^10.0.1",
|
||||
"@shikijs/types": "3.13.0",
|
||||
"@shikijs/vscode-textmate": "^10.0.2",
|
||||
"@types/hast": "^3.0.4",
|
||||
"hast-util-to-html": "^9.0.4"
|
||||
"hast-util-to-html": "^9.0.5"
|
||||
}
|
||||
},
|
||||
"node_modules/@shikijs/engine-javascript": {
|
||||
"version": "1.29.2",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-1.29.2.tgz",
|
||||
"integrity": "sha512-iNEZv4IrLYPv64Q6k7EPpOCE/nuvGiKl7zxdq0WFuRPF5PAE9PRo2JGq/d8crLusM59BRemJ4eOqrFrC4wiQ+A==",
|
||||
"version": "3.13.0",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-3.13.0.tgz",
|
||||
"integrity": "sha512-Ty7xv32XCp8u0eQt8rItpMs6rU9Ki6LJ1dQOW3V/56PKDcpvfHPnYFbsx5FFUP2Yim34m/UkazidamMNVR4vKg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@shikijs/types": "1.29.2",
|
||||
"@shikijs/vscode-textmate": "^10.0.1",
|
||||
"oniguruma-to-es": "^2.2.0"
|
||||
"@shikijs/types": "3.13.0",
|
||||
"@shikijs/vscode-textmate": "^10.0.2",
|
||||
"oniguruma-to-es": "^4.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/@shikijs/engine-oniguruma": {
|
||||
"version": "1.29.2",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-1.29.2.tgz",
|
||||
"integrity": "sha512-7iiOx3SG8+g1MnlzZVDYiaeHe7Ez2Kf2HrJzdmGwkRisT7r4rak0e655AcM/tF9JG/kg5fMNYlLLKglbN7gBqA==",
|
||||
"version": "3.13.0",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-3.13.0.tgz",
|
||||
"integrity": "sha512-O42rBGr4UDSlhT2ZFMxqM7QzIU+IcpoTMzb3W7AlziI1ZF7R8eS2M0yt5Ry35nnnTX/LTLXFPUjRFCIW+Operg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@shikijs/types": "1.29.2",
|
||||
"@shikijs/vscode-textmate": "^10.0.1"
|
||||
"@shikijs/types": "3.13.0",
|
||||
"@shikijs/vscode-textmate": "^10.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@shikijs/langs": {
|
||||
"version": "1.29.2",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-1.29.2.tgz",
|
||||
"integrity": "sha512-FIBA7N3LZ+223U7cJDUYd5shmciFQlYkFXlkKVaHsCPgfVLiO+e12FmQE6Tf9vuyEsFe3dIl8qGWKXgEHL9wmQ==",
|
||||
"version": "3.13.0",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-3.13.0.tgz",
|
||||
"integrity": "sha512-672c3WAETDYHwrRP0yLy3W1QYB89Hbpj+pO4KhxK6FzIrDI2FoEXNiNCut6BQmEApYLfuYfpgOZaqbY+E9b8wQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@shikijs/types": "1.29.2"
|
||||
"@shikijs/types": "3.13.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@shikijs/themes": {
|
||||
"version": "1.29.2",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-1.29.2.tgz",
|
||||
"integrity": "sha512-i9TNZlsq4uoyqSbluIcZkmPL9Bfi3djVxRnofUHwvx/h6SRW3cwgBC5SML7vsDcWyukY0eCzVN980rqP6qNl9g==",
|
||||
"version": "3.13.0",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-3.13.0.tgz",
|
||||
"integrity": "sha512-Vxw1Nm1/Od8jyA7QuAenaV78BG2nSr3/gCGdBkLpfLscddCkzkL36Q5b67SrLLfvAJTOUzW39x4FHVCFriPVgg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@shikijs/types": "1.29.2"
|
||||
"@shikijs/types": "3.13.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@shikijs/types": {
|
||||
"version": "1.29.2",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/types/-/types-1.29.2.tgz",
|
||||
"integrity": "sha512-VJjK0eIijTZf0QSTODEXCqinjBn0joAHQ+aPSBzrv4O2d/QSbsMw+ZeSRx03kV34Hy7NzUvV/7NqfYGRLrASmw==",
|
||||
"version": "3.13.0",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/types/-/types-3.13.0.tgz",
|
||||
"integrity": "sha512-oM9P+NCFri/mmQ8LoFGVfVyemm5Hi27330zuOBp0annwJdKH1kOLndw3zCtAVDehPLg9fKqoEx3Ht/wNZxolfw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@shikijs/vscode-textmate": "^10.0.1",
|
||||
"@shikijs/vscode-textmate": "^10.0.2",
|
||||
"@types/hast": "^3.0.4"
|
||||
}
|
||||
},
|
||||
|
@ -6084,12 +6082,6 @@
|
|||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/emoji-regex-xs": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex-xs/-/emoji-regex-xs-1.0.0.tgz",
|
||||
"integrity": "sha512-LRlerrMYoIDrT6jgpeZ2YYl/L8EulRTt5hQcYjy5AInh7HWXKimpqx68aknBFpGL2+/IcogTcaydJEgaTmOpDg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/encodeurl": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
|
||||
|
@ -11813,15 +11805,21 @@
|
|||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/oniguruma-parser": {
|
||||
"version": "0.12.1",
|
||||
"resolved": "https://registry.npmjs.org/oniguruma-parser/-/oniguruma-parser-0.12.1.tgz",
|
||||
"integrity": "sha512-8Unqkvk1RYc6yq2WBYRj4hdnsAxVze8i7iPfQr8e4uSP3tRv0rpZcbGUDvxfQQcdwHt/e9PrMvGCsa8OqG9X3w==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/oniguruma-to-es": {
|
||||
"version": "2.3.0",
|
||||
"resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-2.3.0.tgz",
|
||||
"integrity": "sha512-bwALDxriqfKGfUufKGGepCzu9x7nJQuoRoAFp4AnwehhC2crqrDIAP/uN2qdlsAvSMpeRC3+Yzhqc7hLmle5+g==",
|
||||
"version": "4.3.3",
|
||||
"resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-4.3.3.tgz",
|
||||
"integrity": "sha512-rPiZhzC3wXwE59YQMRDodUwwT9FZ9nNBwQQfsd1wfdtlKEyCdRV0avrTcSZ5xlIvGRVPd/cx6ZN45ECmS39xvg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"emoji-regex-xs": "^1.0.0",
|
||||
"regex": "^5.1.1",
|
||||
"regex-recursion": "^5.1.1"
|
||||
"oniguruma-parser": "^0.12.1",
|
||||
"regex": "^6.0.1",
|
||||
"regex-recursion": "^6.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/openid-client": {
|
||||
|
@ -12613,21 +12611,20 @@
|
|||
}
|
||||
},
|
||||
"node_modules/regex": {
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/regex/-/regex-5.1.1.tgz",
|
||||
"integrity": "sha512-dN5I359AVGPnwzJm2jN1k0W9LPZ+ePvoOeVMMfqIMFz53sSwXkxaJoxr50ptnsC771lK95BnTrVSZxq0b9yCGw==",
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/regex/-/regex-6.0.1.tgz",
|
||||
"integrity": "sha512-uorlqlzAKjKQZ5P+kTJr3eeJGSVroLKoHmquUj4zHWuR+hEyNqlXsSKlYYF5F4NI6nl7tWCs0apKJ0lmfsXAPA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"regex-utilities": "^2.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/regex-recursion": {
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/regex-recursion/-/regex-recursion-5.1.1.tgz",
|
||||
"integrity": "sha512-ae7SBCbzVNrIjgSbh7wMznPcQel1DNlDtzensnFxpiNpXt1U2ju/bHugH422r+4LAVS1FpW1YCwilmnNsjum9w==",
|
||||
"version": "6.0.2",
|
||||
"resolved": "https://registry.npmjs.org/regex-recursion/-/regex-recursion-6.0.2.tgz",
|
||||
"integrity": "sha512-0YCaSCq2VRIebiaUviZNs0cBz1kg5kVS2UKUfNIx8YVs1cN3AV7NTctO5FOKBA+UT2BPJIWZauYHPqJODG50cg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"regex": "^5.1.1",
|
||||
"regex-utilities": "^2.3.0"
|
||||
}
|
||||
},
|
||||
|
@ -13165,18 +13162,18 @@
|
|||
}
|
||||
},
|
||||
"node_modules/shiki": {
|
||||
"version": "1.29.2",
|
||||
"resolved": "https://registry.npmjs.org/shiki/-/shiki-1.29.2.tgz",
|
||||
"integrity": "sha512-njXuliz/cP+67jU2hukkxCNuH1yUi4QfdZZY+sMr5PPrIyXSu5iTb/qYC4BiWWB0vZ+7TbdvYUCeL23zpwCfbg==",
|
||||
"version": "3.13.0",
|
||||
"resolved": "https://registry.npmjs.org/shiki/-/shiki-3.13.0.tgz",
|
||||
"integrity": "sha512-aZW4l8Og16CokuCLf8CF8kq+KK2yOygapU5m3+hoGw0Mdosc6fPitjM+ujYarppj5ZIKGyPDPP1vqmQhr+5/0g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@shikijs/core": "1.29.2",
|
||||
"@shikijs/engine-javascript": "1.29.2",
|
||||
"@shikijs/engine-oniguruma": "1.29.2",
|
||||
"@shikijs/langs": "1.29.2",
|
||||
"@shikijs/themes": "1.29.2",
|
||||
"@shikijs/types": "1.29.2",
|
||||
"@shikijs/vscode-textmate": "^10.0.1",
|
||||
"@shikijs/core": "3.13.0",
|
||||
"@shikijs/engine-javascript": "3.13.0",
|
||||
"@shikijs/engine-oniguruma": "3.13.0",
|
||||
"@shikijs/langs": "3.13.0",
|
||||
"@shikijs/themes": "3.13.0",
|
||||
"@shikijs/types": "3.13.0",
|
||||
"@shikijs/vscode-textmate": "^10.0.2",
|
||||
"@types/hast": "^3.0.4"
|
||||
}
|
||||
},
|
||||
|
@ -13970,9 +13967,9 @@
|
|||
"license": "0BSD"
|
||||
},
|
||||
"node_modules/tw-animate-css": {
|
||||
"version": "1.2.9",
|
||||
"resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz",
|
||||
"integrity": "sha512-9O4k1at9pMQff9EAcCEuy1UNO43JmaPQvq+0lwza9Y0BQ6LB38NiMj+qHqjoQf40355MX+gs6wtlR6H9WsSXFg==",
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.4.0.tgz",
|
||||
"integrity": "sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
"react-markdown": "^10.1.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remeda": "^2.32.0",
|
||||
"shiki": "^1.29.2",
|
||||
"shiki": "^3.13.0",
|
||||
"sonner": "^2.0.7",
|
||||
"tailwind-merge": "^3.3.1"
|
||||
},
|
||||
|
@ -56,7 +56,7 @@
|
|||
"prettier": "3.6.2",
|
||||
"tailwindcss": "^4",
|
||||
"ts-node": "^10.9.2",
|
||||
"tw-animate-css": "^1.2.9",
|
||||
"tw-animate-css": "^1.4.0",
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -167,6 +167,8 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
|
|||
from starlette.responses import Response
|
||||
from starlette.routing import Mount, Route
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
server = FastMCP("FastMCP Test Server", log_level="WARNING")
|
||||
|
||||
tools = tools or default_tools()
|
||||
|
@ -211,6 +213,7 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
|
|||
return sock.getsockname()[1]
|
||||
|
||||
port = get_open_port()
|
||||
logger = get_logger(__name__, category="tests::mcp")
|
||||
|
||||
# make uvicorn logs be less verbose
|
||||
config = uvicorn.Config(app, host="0.0.0.0", port=port, log_level="warning")
|
||||
|
@ -218,10 +221,17 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
|
|||
app.state.uvicorn_server = server_instance
|
||||
|
||||
def run_server():
|
||||
server_instance.run()
|
||||
try:
|
||||
logger.info(f"Starting MCP server on port {port}")
|
||||
server_instance.run()
|
||||
logger.info(f"MCP server on port {port} has stopped")
|
||||
except Exception as e:
|
||||
logger.error(f"MCP server failed to start on port {port}: {e}")
|
||||
raise
|
||||
|
||||
# Start the server in a new thread
|
||||
server_thread = threading.Thread(target=run_server, daemon=True)
|
||||
logger.info(f"Starting MCP server thread on port {port}")
|
||||
server_thread.start()
|
||||
|
||||
# Polling until the server is ready
|
||||
|
@ -229,24 +239,36 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
|
|||
start_time = time.time()
|
||||
|
||||
server_url = f"http://localhost:{port}/sse"
|
||||
logger.info(f"Waiting for MCP server to be ready at {server_url}")
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
try:
|
||||
response = httpx.get(server_url)
|
||||
if response.status_code in [200, 401]:
|
||||
logger.info(f"MCP server is ready on port {port} (status: {response.status_code})")
|
||||
break
|
||||
except httpx.RequestError:
|
||||
except httpx.RequestError as e:
|
||||
logger.debug(f"Server not ready yet, retrying... ({e})")
|
||||
pass
|
||||
time.sleep(0.1)
|
||||
else:
|
||||
# If we exit the loop due to timeout
|
||||
logger.error(f"MCP server failed to start within {timeout} seconds on port {port}")
|
||||
logger.error(f"Thread alive: {server_thread.is_alive()}")
|
||||
if server_thread.is_alive():
|
||||
logger.error("Server thread is still running but not responding to HTTP requests")
|
||||
|
||||
try:
|
||||
yield {"server_url": server_url}
|
||||
finally:
|
||||
logger.info(f"Shutting down MCP server on port {port}")
|
||||
server_instance.should_exit = True
|
||||
time.sleep(0.5)
|
||||
|
||||
# Force shutdown if still running
|
||||
if server_thread.is_alive():
|
||||
try:
|
||||
logger.info("Force shutting down server thread")
|
||||
if hasattr(server_instance, "servers") and server_instance.servers:
|
||||
for srv in server_instance.servers:
|
||||
srv.close()
|
||||
|
@ -254,9 +276,9 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
|
|||
# Wait for graceful shutdown
|
||||
server_thread.join(timeout=3)
|
||||
if server_thread.is_alive():
|
||||
print("Warning: Server thread still alive after shutdown attempt")
|
||||
logger.warning("Server thread still alive after shutdown attempt")
|
||||
except Exception as e:
|
||||
print(f"Error during server shutdown: {e}")
|
||||
logger.error(f"Error during server shutdown: {e}")
|
||||
|
||||
# CRITICAL: Reset SSE global state to prevent event loop contamination
|
||||
# Reset the SSE AppStatus singleton that stores anyio.Event objects
|
||||
|
|
|
@ -1,76 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import pytest
|
||||
|
||||
from ..test_cases.test_case import TestCase
|
||||
|
||||
|
||||
def skip_if_provider_doesnt_support_batch_inference(client_with_models, model_id):
|
||||
models = {m.identifier: m for m in client_with_models.models.list()}
|
||||
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
|
||||
provider_id = models[model_id].provider_id
|
||||
providers = {p.provider_id: p for p in client_with_models.providers.list()}
|
||||
provider = providers[provider_id]
|
||||
if provider.provider_type not in ("inline::meta-reference",):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support batch inference")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:batch_completion",
|
||||
],
|
||||
)
|
||||
def test_batch_completion_non_streaming(client_with_models, text_model_id, test_case):
|
||||
skip_if_provider_doesnt_support_batch_inference(client_with_models, text_model_id)
|
||||
tc = TestCase(test_case)
|
||||
|
||||
content_batch = tc["contents"]
|
||||
response = client_with_models.inference.batch_completion(
|
||||
content_batch=content_batch,
|
||||
model_id=text_model_id,
|
||||
sampling_params={
|
||||
"max_tokens": 50,
|
||||
},
|
||||
)
|
||||
assert len(response.batch) == len(content_batch)
|
||||
for i, r in enumerate(response.batch):
|
||||
print(f"response {i}: {r.content}")
|
||||
assert len(r.content) > 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:batch_completion",
|
||||
],
|
||||
)
|
||||
def test_batch_chat_completion_non_streaming(client_with_models, text_model_id, test_case):
|
||||
skip_if_provider_doesnt_support_batch_inference(client_with_models, text_model_id)
|
||||
tc = TestCase(test_case)
|
||||
qa_pairs = tc["qa_pairs"]
|
||||
|
||||
message_batch = [
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": qa["question"],
|
||||
}
|
||||
]
|
||||
for qa in qa_pairs
|
||||
]
|
||||
|
||||
response = client_with_models.inference.batch_chat_completion(
|
||||
messages_batch=message_batch,
|
||||
model_id=text_model_id,
|
||||
)
|
||||
assert len(response.batch) == len(qa_pairs)
|
||||
for i, r in enumerate(response.batch):
|
||||
print(f"response {i}: {r.completion_message.content}")
|
||||
assert len(r.completion_message.content) > 0
|
||||
assert qa_pairs[i]["answer"].lower() in r.completion_message.content.lower()
|
|
@ -1,303 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
#
|
||||
# Test plan:
|
||||
#
|
||||
# Types of input:
|
||||
# - array of a string
|
||||
# - array of a image (ImageContentItem, either URL or base64 string)
|
||||
# - array of a text (TextContentItem)
|
||||
# Types of output:
|
||||
# - list of list of floats
|
||||
# Params:
|
||||
# - text_truncation
|
||||
# - absent w/ long text -> error
|
||||
# - none w/ long text -> error
|
||||
# - absent w/ short text -> ok
|
||||
# - none w/ short text -> ok
|
||||
# - end w/ long text -> ok
|
||||
# - end w/ short text -> ok
|
||||
# - start w/ long text -> ok
|
||||
# - start w/ short text -> ok
|
||||
# - output_dimension
|
||||
# - response dimension matches
|
||||
# - task_type, only for asymmetric models
|
||||
# - query embedding != passage embedding
|
||||
# Negative:
|
||||
# - long string
|
||||
# - long text
|
||||
#
|
||||
# Todo:
|
||||
# - negative tests
|
||||
# - empty
|
||||
# - empty list
|
||||
# - empty string
|
||||
# - empty text
|
||||
# - empty image
|
||||
# - long
|
||||
# - large image
|
||||
# - appropriate combinations
|
||||
# - batch size
|
||||
# - many inputs
|
||||
# - invalid
|
||||
# - invalid URL
|
||||
# - invalid base64
|
||||
#
|
||||
# Notes:
|
||||
# - use llama_stack_client fixture
|
||||
# - use pytest.mark.parametrize when possible
|
||||
# - no accuracy tests: only check the type of output, not the content
|
||||
#
|
||||
|
||||
import pytest
|
||||
from llama_stack_client import BadRequestError as LlamaStackBadRequestError
|
||||
from llama_stack_client.types import EmbeddingsResponse
|
||||
from llama_stack_client.types.shared.interleaved_content import (
|
||||
ImageContentItem,
|
||||
ImageContentItemImage,
|
||||
ImageContentItemImageURL,
|
||||
TextContentItem,
|
||||
)
|
||||
from openai import BadRequestError as OpenAIBadRequestError
|
||||
|
||||
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||
|
||||
DUMMY_STRING = "hello"
|
||||
DUMMY_STRING2 = "world"
|
||||
DUMMY_LONG_STRING = "NVDA " * 10240
|
||||
DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
|
||||
DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
|
||||
DUMMY_LONG_TEXT = TextContentItem(text=DUMMY_LONG_STRING, type="text")
|
||||
# TODO(mf): add a real image URL and base64 string
|
||||
DUMMY_IMAGE_URL = ImageContentItem(
|
||||
image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
|
||||
)
|
||||
DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
|
||||
SUPPORTED_PROVIDERS = {"remote::nvidia"}
|
||||
MODELS_SUPPORTING_MEDIA = {}
|
||||
MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"}
|
||||
MODELS_REQUIRING_TASK_TYPE = {
|
||||
"nvidia/llama-3.2-nv-embedqa-1b-v2",
|
||||
"nvidia/nv-embedqa-e5-v5",
|
||||
"nvidia/nv-embedqa-mistral-7b-v2",
|
||||
"snowflake/arctic-embed-l",
|
||||
}
|
||||
MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE
|
||||
|
||||
|
||||
def default_task_type(model_id):
|
||||
"""
|
||||
Some models require a task type parameter. This provides a default value for
|
||||
testing those models.
|
||||
"""
|
||||
if model_id in MODELS_REQUIRING_TASK_TYPE:
|
||||
return {"task_type": "query"}
|
||||
return {}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"contents",
|
||||
[
|
||||
[DUMMY_STRING, DUMMY_STRING2],
|
||||
[DUMMY_TEXT, DUMMY_TEXT2],
|
||||
],
|
||||
ids=[
|
||||
"list[string]",
|
||||
"list[text]",
|
||||
],
|
||||
)
|
||||
def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
|
||||
if inference_provider_type not in SUPPORTED_PROVIDERS:
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
|
||||
response = llama_stack_client.inference.embeddings(
|
||||
model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
|
||||
)
|
||||
assert isinstance(response, EmbeddingsResponse)
|
||||
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
|
||||
assert isinstance(response.embeddings[0], list)
|
||||
assert isinstance(response.embeddings[0][0], float)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"contents",
|
||||
[
|
||||
[DUMMY_IMAGE_URL, DUMMY_IMAGE_BASE64],
|
||||
[DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT],
|
||||
],
|
||||
ids=[
|
||||
"list[url,base64]",
|
||||
"list[url,string,base64,text]",
|
||||
],
|
||||
)
|
||||
def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
|
||||
if inference_provider_type not in SUPPORTED_PROVIDERS:
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
|
||||
if embedding_model_id not in MODELS_SUPPORTING_MEDIA:
|
||||
pytest.xfail(f"{embedding_model_id} doesn't support media")
|
||||
response = llama_stack_client.inference.embeddings(
|
||||
model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
|
||||
)
|
||||
assert isinstance(response, EmbeddingsResponse)
|
||||
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
|
||||
assert isinstance(response.embeddings[0], list)
|
||||
assert isinstance(response.embeddings[0][0], float)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text_truncation",
|
||||
[
|
||||
"end",
|
||||
"start",
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"contents",
|
||||
[
|
||||
[DUMMY_LONG_TEXT],
|
||||
[DUMMY_STRING],
|
||||
],
|
||||
ids=[
|
||||
"long",
|
||||
"short",
|
||||
],
|
||||
)
|
||||
def test_embedding_truncation(
|
||||
llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
|
||||
):
|
||||
if inference_provider_type not in SUPPORTED_PROVIDERS:
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
|
||||
response = llama_stack_client.inference.embeddings(
|
||||
model_id=embedding_model_id,
|
||||
contents=contents,
|
||||
text_truncation=text_truncation,
|
||||
**default_task_type(embedding_model_id),
|
||||
)
|
||||
assert isinstance(response, EmbeddingsResponse)
|
||||
assert len(response.embeddings) == 1
|
||||
assert isinstance(response.embeddings[0], list)
|
||||
assert isinstance(response.embeddings[0][0], float)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text_truncation",
|
||||
[
|
||||
None,
|
||||
"none",
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"contents",
|
||||
[
|
||||
[DUMMY_LONG_TEXT],
|
||||
[DUMMY_LONG_STRING],
|
||||
],
|
||||
ids=[
|
||||
"long-text",
|
||||
"long-str",
|
||||
],
|
||||
)
|
||||
def test_embedding_truncation_error(
|
||||
llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
|
||||
):
|
||||
if inference_provider_type not in SUPPORTED_PROVIDERS:
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
|
||||
# Using LlamaStackClient from llama_stack_client will raise llama_stack_client.BadRequestError
|
||||
# While using LlamaStackAsLibraryClient from llama_stack.distribution.library_client will raise the error that the backend raises
|
||||
error_type = (
|
||||
OpenAIBadRequestError
|
||||
if isinstance(llama_stack_client, LlamaStackAsLibraryClient)
|
||||
else LlamaStackBadRequestError
|
||||
)
|
||||
with pytest.raises(error_type):
|
||||
llama_stack_client.inference.embeddings(
|
||||
model_id=embedding_model_id,
|
||||
contents=[DUMMY_LONG_TEXT],
|
||||
text_truncation=text_truncation,
|
||||
**default_task_type(embedding_model_id),
|
||||
)
|
||||
|
||||
|
||||
def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
|
||||
if inference_provider_type not in SUPPORTED_PROVIDERS:
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
|
||||
if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION:
|
||||
pytest.xfail(f"{embedding_model_id} doesn't support output_dimension")
|
||||
base_response = llama_stack_client.inference.embeddings(
|
||||
model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id)
|
||||
)
|
||||
test_response = llama_stack_client.inference.embeddings(
|
||||
model_id=embedding_model_id,
|
||||
contents=[DUMMY_STRING],
|
||||
**default_task_type(embedding_model_id),
|
||||
output_dimension=32,
|
||||
)
|
||||
assert len(base_response.embeddings[0]) != len(test_response.embeddings[0])
|
||||
assert len(test_response.embeddings[0]) == 32
|
||||
|
||||
|
||||
def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
|
||||
if inference_provider_type not in SUPPORTED_PROVIDERS:
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
|
||||
if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE:
|
||||
pytest.xfail(f"{embedding_model_id} doesn't support task_type")
|
||||
query_embedding = llama_stack_client.inference.embeddings(
|
||||
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
|
||||
)
|
||||
document_embedding = llama_stack_client.inference.embeddings(
|
||||
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="document"
|
||||
)
|
||||
assert query_embedding.embeddings != document_embedding.embeddings
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text_truncation",
|
||||
[
|
||||
None,
|
||||
"none",
|
||||
"end",
|
||||
"start",
|
||||
],
|
||||
)
|
||||
def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type):
|
||||
if inference_provider_type not in SUPPORTED_PROVIDERS:
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
|
||||
response = llama_stack_client.inference.embeddings(
|
||||
model_id=embedding_model_id,
|
||||
contents=[DUMMY_STRING],
|
||||
text_truncation=text_truncation,
|
||||
**default_task_type(embedding_model_id),
|
||||
)
|
||||
assert isinstance(response, EmbeddingsResponse)
|
||||
assert len(response.embeddings) == 1
|
||||
assert isinstance(response.embeddings[0], list)
|
||||
assert isinstance(response.embeddings[0][0], float)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text_truncation",
|
||||
[
|
||||
"NONE",
|
||||
"END",
|
||||
"START",
|
||||
"left",
|
||||
"right",
|
||||
],
|
||||
)
|
||||
def test_embedding_text_truncation_error(
|
||||
llama_stack_client, embedding_model_id, text_truncation, inference_provider_type
|
||||
):
|
||||
if inference_provider_type not in SUPPORTED_PROVIDERS:
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
|
||||
error_type = ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
|
||||
with pytest.raises(error_type):
|
||||
llama_stack_client.inference.embeddings(
|
||||
model_id=embedding_model_id,
|
||||
contents=[DUMMY_STRING],
|
||||
text_truncation=text_truncation,
|
||||
**default_task_type(embedding_model_id),
|
||||
)
|
|
@ -9,6 +9,7 @@ import time
|
|||
import unicodedata
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ..test_cases.test_case import TestCase
|
||||
|
||||
|
@ -62,6 +63,14 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
|||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
||||
|
||||
|
||||
def skip_if_doesnt_support_completions_logprobs(client_with_models, model_id):
|
||||
provider_type = provider_from_model(client_with_models, model_id).provider_type
|
||||
if provider_type in (
|
||||
"remote::ollama", # logprobs is ignored
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider_type} doesn't support /v1/completions logprobs.")
|
||||
|
||||
|
||||
def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
|
||||
# To test `fim` ( fill in the middle ) completion, we need to use a model that supports suffix.
|
||||
# Use this to specifically test this API functionality.
|
||||
|
@ -205,28 +214,6 @@ def test_openai_completion_streaming(llama_stack_client, client_with_models, tex
|
|||
assert len(content_str) > 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"prompt_logprobs",
|
||||
[
|
||||
1,
|
||||
0,
|
||||
],
|
||||
)
|
||||
def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs):
|
||||
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
|
||||
|
||||
prompt = "Hello, world!"
|
||||
response = llama_stack_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=prompt,
|
||||
stream=False,
|
||||
prompt_logprobs=prompt_logprobs,
|
||||
)
|
||||
assert len(response.choices) > 0
|
||||
choice = response.choices[0]
|
||||
assert len(choice.prompt_logprobs) > 0
|
||||
|
||||
|
||||
def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id):
|
||||
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
|
||||
|
||||
|
@ -518,3 +505,214 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
|
|||
message_content = response.choices[0].message.content.lower().strip()
|
||||
normalized_content = _normalize_text(message_content)
|
||||
assert "hello world" in normalized_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:stop_sequence",
|
||||
],
|
||||
)
|
||||
def test_openai_completion_stop_sequence(client_with_models, openai_client, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
|
||||
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = openai_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=tc["content"],
|
||||
stop="1963",
|
||||
stream=False,
|
||||
)
|
||||
assert len(response.choices) > 0
|
||||
choice = response.choices[0]
|
||||
assert "1963" not in choice.text
|
||||
|
||||
response = openai_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=tc["content"],
|
||||
stop=["blathering", "1963"],
|
||||
stream=False,
|
||||
)
|
||||
assert len(response.choices) > 0
|
||||
choice = response.choices[0]
|
||||
assert "1963" not in choice.text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:log_probs",
|
||||
],
|
||||
)
|
||||
def test_openai_completion_logprobs(client_with_models, openai_client, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
|
||||
skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
|
||||
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = openai_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=tc["content"],
|
||||
logprobs=5,
|
||||
)
|
||||
assert len(response.choices) > 0
|
||||
choice = response.choices[0]
|
||||
assert choice.text, "Response text should not be empty"
|
||||
assert choice.logprobs, "Logprobs should not be empty"
|
||||
logprobs = choice.logprobs
|
||||
assert logprobs.token_logprobs, "Response tokens should not be empty"
|
||||
assert len(logprobs.tokens) == len(logprobs.token_logprobs)
|
||||
assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
|
||||
for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
|
||||
assert logprobs.top_logprobs[i][token] == prob
|
||||
assert len(logprobs.top_logprobs[i]) == 5
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:log_probs",
|
||||
],
|
||||
)
|
||||
def test_openai_completion_logprobs_streaming(client_with_models, openai_client, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
|
||||
skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
|
||||
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = openai_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=tc["content"],
|
||||
logprobs=3,
|
||||
stream=True,
|
||||
max_tokens=5,
|
||||
)
|
||||
for chunk in response:
|
||||
choice = chunk.choices[0]
|
||||
choice = response.choices[0]
|
||||
if choice.text: # if there's a token, we expect logprobs
|
||||
assert choice.logprobs, "Logprobs should not be empty"
|
||||
logprobs = choice.logprobs
|
||||
assert logprobs.token_logprobs, "Response tokens should not be empty"
|
||||
assert len(logprobs.tokens) == len(logprobs.token_logprobs)
|
||||
assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
|
||||
for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
|
||||
assert logprobs.top_logprobs[i][token] == prob
|
||||
assert len(logprobs.top_logprobs[i]) == 3
|
||||
else: # no token, no logprobs
|
||||
assert not choice.logprobs, "Logprobs should be empty"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:tool_calling",
|
||||
],
|
||||
)
|
||||
def test_openai_chat_completion_with_tools(openai_client, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = openai_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=tc["messages"],
|
||||
tools=tc["tools"],
|
||||
tool_choice="auto",
|
||||
stream=False,
|
||||
)
|
||||
assert len(response.choices) == 1
|
||||
assert len(response.choices[0].message.tool_calls) == 1
|
||||
tool_call = response.choices[0].message.tool_calls[0]
|
||||
assert tool_call.function.name == tc["tools"][0]["function"]["name"]
|
||||
assert "location" in tool_call.function.arguments
|
||||
assert tc["expected"]["location"] in tool_call.function.arguments
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:tool_calling",
|
||||
],
|
||||
)
|
||||
def test_openai_chat_completion_with_tools_and_streaming(openai_client, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = openai_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=tc["messages"],
|
||||
tools=tc["tools"],
|
||||
tool_choice="auto",
|
||||
stream=True,
|
||||
)
|
||||
# Accumulate tool calls from streaming chunks
|
||||
tool_calls = []
|
||||
for chunk in response:
|
||||
if chunk.choices and chunk.choices[0].delta.tool_calls:
|
||||
for i, tc_delta in enumerate(chunk.choices[0].delta.tool_calls):
|
||||
while len(tool_calls) <= i:
|
||||
tool_calls.append({"function": {"name": "", "arguments": ""}})
|
||||
if tc_delta.function and tc_delta.function.name:
|
||||
tool_calls[i]["function"]["name"] = tc_delta.function.name
|
||||
if tc_delta.function and tc_delta.function.arguments:
|
||||
tool_calls[i]["function"]["arguments"] += tc_delta.function.arguments
|
||||
assert len(tool_calls) == 1
|
||||
tool_call = tool_calls[0]
|
||||
assert tool_call["function"]["name"] == tc["tools"][0]["function"]["name"]
|
||||
assert "location" in tool_call["function"]["arguments"]
|
||||
assert tc["expected"]["location"] in tool_call["function"]["arguments"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:tool_calling",
|
||||
],
|
||||
)
|
||||
def test_openai_chat_completion_with_tool_choice_none(openai_client, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = openai_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=tc["messages"],
|
||||
tools=tc["tools"],
|
||||
tool_choice="none",
|
||||
stream=False,
|
||||
)
|
||||
assert len(response.choices) == 1
|
||||
tool_calls = response.choices[0].message.tool_calls
|
||||
assert tool_calls is None or len(tool_calls) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:structured_output",
|
||||
],
|
||||
)
|
||||
def test_openai_chat_completion_structured_output(openai_client, text_model_id, test_case):
|
||||
# Note: Skip condition may need adjustment for OpenAI client
|
||||
class AnswerFormat(BaseModel):
|
||||
first_name: str
|
||||
last_name: str
|
||||
year_of_birth: int
|
||||
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = openai_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=tc["messages"],
|
||||
response_format={
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "AnswerFormat",
|
||||
"schema": AnswerFormat.model_json_schema(),
|
||||
},
|
||||
},
|
||||
stream=False,
|
||||
)
|
||||
print(response.choices[0].message.content)
|
||||
answer = AnswerFormat.model_validate_json(response.choices[0].message.content)
|
||||
expected = tc["expected"]
|
||||
assert answer.first_name == expected["first_name"]
|
||||
assert answer.last_name == expected["last_name"]
|
||||
assert answer.year_of_birth == expected["year_of_birth"]
|
||||
|
|
77
tests/integration/inference/test_openai_vision_inference.py
Normal file
77
tests/integration/inference/test_openai_vision_inference.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import base64
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def image_path():
|
||||
return pathlib.Path(__file__).parent / "dog.png"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def base64_image_data(image_path):
|
||||
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
|
||||
|
||||
|
||||
async def test_openai_chat_completion_image_url(openai_client, vision_model_id):
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe what is in this image.",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
response = openai_client.chat.completions.create(
|
||||
model=vision_model_id,
|
||||
messages=[message],
|
||||
stream=False,
|
||||
)
|
||||
|
||||
message_content = response.choices[0].message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
|
||||
|
||||
|
||||
async def test_openai_chat_completion_image_data(openai_client, vision_model_id, base64_image_data):
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/png;base64,{base64_image_data}",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe what is in this image.",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
response = openai_client.chat.completions.create(
|
||||
model=vision_model_id,
|
||||
messages=[message],
|
||||
stream=False,
|
||||
)
|
||||
|
||||
message_content = response.choices[0].message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
|
|
@ -1,545 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from time import sleep
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.models.llama.sku_list import resolve_model
|
||||
|
||||
from ..test_cases.test_case import TestCase
|
||||
|
||||
PROVIDER_LOGPROBS_TOP_K = {"remote::together", "remote::fireworks", "remote::vllm"}
|
||||
|
||||
|
||||
def skip_if_model_doesnt_support_completion(client_with_models, model_id):
|
||||
models = {m.identifier: m for m in client_with_models.models.list()}
|
||||
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
|
||||
provider_id = models[model_id].provider_id
|
||||
providers = {p.provider_id: p for p in client_with_models.providers.list()}
|
||||
provider = providers[provider_id]
|
||||
if (
|
||||
provider.provider_type
|
||||
in (
|
||||
"remote::openai",
|
||||
"remote::anthropic",
|
||||
"remote::gemini",
|
||||
"remote::vertexai",
|
||||
"remote::groq",
|
||||
"remote::sambanova",
|
||||
"remote::azure",
|
||||
)
|
||||
or "openai-compat" in provider.provider_type
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")
|
||||
|
||||
|
||||
def skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, model_id):
|
||||
models = {m.identifier: m for m in client_with_models.models.list()}
|
||||
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
|
||||
provider_id = models[model_id].provider_id
|
||||
providers = {p.provider_id: p for p in client_with_models.providers.list()}
|
||||
provider = providers[provider_id]
|
||||
if provider.provider_type in ("remote::sambanova", "remote::azure", "remote::watsonx"):
|
||||
pytest.skip(
|
||||
f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
|
||||
)
|
||||
|
||||
|
||||
def get_llama_model(client_with_models, model_id):
|
||||
models = {}
|
||||
for m in client_with_models.models.list():
|
||||
models[m.identifier] = m
|
||||
models[m.provider_resource_id] = m
|
||||
|
||||
assert model_id in models, f"Model {model_id} not found"
|
||||
|
||||
model = models[model_id]
|
||||
ids = (model.identifier, model.provider_resource_id)
|
||||
for mid in ids:
|
||||
if resolve_model(mid):
|
||||
return mid
|
||||
|
||||
return model.metadata.get("llama_model", None)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:sanity",
|
||||
],
|
||||
)
|
||||
def test_text_completion_non_streaming(client_with_models, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.completion(
|
||||
content=tc["content"],
|
||||
stream=False,
|
||||
model_id=text_model_id,
|
||||
sampling_params={
|
||||
"max_tokens": 50,
|
||||
},
|
||||
)
|
||||
assert len(response.content) > 10
|
||||
# assert "blue" in response.content.lower().strip()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:sanity",
|
||||
],
|
||||
)
|
||||
def test_text_completion_streaming(client_with_models, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.completion(
|
||||
content=tc["content"],
|
||||
stream=True,
|
||||
model_id=text_model_id,
|
||||
sampling_params={
|
||||
"max_tokens": 50,
|
||||
},
|
||||
)
|
||||
streamed_content = [chunk.delta for chunk in response]
|
||||
content_str = "".join(streamed_content).lower().strip()
|
||||
# assert "blue" in content_str
|
||||
assert len(content_str) > 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:stop_sequence",
|
||||
],
|
||||
)
|
||||
def test_text_completion_stop_sequence(client_with_models, text_model_id, inference_provider_type, test_case):
|
||||
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
|
||||
# This is only supported/tested for remote vLLM: https://github.com/meta-llama/llama-stack/issues/1771
|
||||
if inference_provider_type != "remote::vllm":
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support 'stop' parameter yet")
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.completion(
|
||||
content=tc["content"],
|
||||
stream=True,
|
||||
model_id=text_model_id,
|
||||
sampling_params={
|
||||
"max_tokens": 50,
|
||||
"stop": ["1963"],
|
||||
},
|
||||
)
|
||||
streamed_content = [chunk.delta for chunk in response]
|
||||
content_str = "".join(streamed_content).lower().strip()
|
||||
assert "1963" not in content_str
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:log_probs",
|
||||
],
|
||||
)
|
||||
def test_text_completion_log_probs_non_streaming(client_with_models, text_model_id, inference_provider_type, test_case):
|
||||
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
|
||||
if inference_provider_type not in PROVIDER_LOGPROBS_TOP_K:
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support log probs yet")
|
||||
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.completion(
|
||||
content=tc["content"],
|
||||
stream=False,
|
||||
model_id=text_model_id,
|
||||
sampling_params={
|
||||
"max_tokens": 5,
|
||||
},
|
||||
logprobs={
|
||||
"top_k": 1,
|
||||
},
|
||||
)
|
||||
assert response.logprobs, "Logprobs should not be empty"
|
||||
assert 1 <= len(response.logprobs) <= 5 # each token has 1 logprob and here max_tokens=5
|
||||
assert all(len(logprob.logprobs_by_token) == 1 for logprob in response.logprobs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:log_probs",
|
||||
],
|
||||
)
|
||||
def test_text_completion_log_probs_streaming(client_with_models, text_model_id, inference_provider_type, test_case):
|
||||
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
|
||||
if inference_provider_type not in PROVIDER_LOGPROBS_TOP_K:
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support log probs yet")
|
||||
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.completion(
|
||||
content=tc["content"],
|
||||
stream=True,
|
||||
model_id=text_model_id,
|
||||
sampling_params={
|
||||
"max_tokens": 5,
|
||||
},
|
||||
logprobs={
|
||||
"top_k": 1,
|
||||
},
|
||||
)
|
||||
streamed_content = list(response)
|
||||
for chunk in streamed_content:
|
||||
if chunk.delta: # if there's a token, we expect logprobs
|
||||
assert chunk.logprobs, "Logprobs should not be empty"
|
||||
assert all(len(logprob.logprobs_by_token) == 1 for logprob in chunk.logprobs)
|
||||
else: # no token, no logprobs
|
||||
assert not chunk.logprobs, "Logprobs should be empty"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:structured_output",
|
||||
],
|
||||
)
|
||||
def test_text_completion_structured_output(client_with_models, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
|
||||
skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
|
||||
|
||||
class AnswerFormat(BaseModel):
|
||||
name: str
|
||||
year_born: str
|
||||
year_retired: str
|
||||
|
||||
tc = TestCase(test_case)
|
||||
|
||||
user_input = tc["user_input"]
|
||||
response = client_with_models.inference.completion(
|
||||
model_id=text_model_id,
|
||||
content=user_input,
|
||||
stream=False,
|
||||
sampling_params={
|
||||
"max_tokens": 50,
|
||||
},
|
||||
response_format={
|
||||
"type": "json_schema",
|
||||
"json_schema": AnswerFormat.model_json_schema(),
|
||||
},
|
||||
)
|
||||
answer = AnswerFormat.model_validate_json(response.content)
|
||||
expected = tc["expected"]
|
||||
assert answer.name == expected["name"]
|
||||
assert answer.year_born == expected["year_born"]
|
||||
assert answer.year_retired == expected["year_retired"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:non_streaming_01",
|
||||
"inference:chat_completion:non_streaming_02",
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_non_streaming(client_with_models, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
question = tc["question"]
|
||||
expected = tc["expected"]
|
||||
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=text_model_id,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": question,
|
||||
}
|
||||
],
|
||||
stream=False,
|
||||
)
|
||||
message_content = response.completion_message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
assert expected.lower() in message_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:streaming_01",
|
||||
"inference:chat_completion:streaming_02",
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_streaming(client_with_models, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
question = tc["question"]
|
||||
expected = tc["expected"]
|
||||
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=text_model_id,
|
||||
messages=[{"role": "user", "content": question}],
|
||||
stream=True,
|
||||
timeout=120, # Increase timeout to 2 minutes for large conversation history
|
||||
)
|
||||
streamed_content = [str(chunk.event.delta.text.lower().strip()) for chunk in response]
|
||||
assert len(streamed_content) > 0
|
||||
assert expected.lower() in "".join(streamed_content)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:tool_calling",
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_with_tool_calling_and_non_streaming(client_with_models, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=text_model_id,
|
||||
messages=tc["messages"],
|
||||
tools=tc["tools"],
|
||||
tool_choice="auto",
|
||||
stream=False,
|
||||
)
|
||||
# some models can return content for the response in addition to the tool call
|
||||
assert response.completion_message.role == "assistant"
|
||||
|
||||
assert len(response.completion_message.tool_calls) == 1
|
||||
assert response.completion_message.tool_calls[0].tool_name == tc["tools"][0]["tool_name"]
|
||||
assert response.completion_message.tool_calls[0].arguments == tc["expected"]
|
||||
|
||||
|
||||
# Will extract streamed text and separate it from tool invocation content
|
||||
# The returned tool inovcation content will be a string so it's easy to comapare with expected value
|
||||
# e.g. "[get_weather, {'location': 'San Francisco, CA'}]"
|
||||
def extract_tool_invocation_content(response):
|
||||
tool_invocation_content: str = ""
|
||||
for chunk in response:
|
||||
delta = chunk.event.delta
|
||||
if delta.type == "tool_call" and delta.parse_status == "succeeded":
|
||||
call = delta.tool_call
|
||||
tool_invocation_content += f"[{call.tool_name}, {call.arguments}]"
|
||||
return tool_invocation_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:tool_calling",
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_with_tool_calling_and_streaming(client_with_models, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=text_model_id,
|
||||
messages=tc["messages"],
|
||||
tools=tc["tools"],
|
||||
tool_choice="auto",
|
||||
stream=True,
|
||||
)
|
||||
tool_invocation_content = extract_tool_invocation_content(response)
|
||||
expected_tool_name = tc["tools"][0]["tool_name"]
|
||||
expected_argument = tc["expected"]
|
||||
assert tool_invocation_content == f"[{expected_tool_name}, {expected_argument}]"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:tool_calling",
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_with_tool_choice_required(client_with_models, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=text_model_id,
|
||||
messages=tc["messages"],
|
||||
tools=tc["tools"],
|
||||
tool_config={
|
||||
"tool_choice": "required",
|
||||
},
|
||||
stream=True,
|
||||
)
|
||||
tool_invocation_content = extract_tool_invocation_content(response)
|
||||
expected_tool_name = tc["tools"][0]["tool_name"]
|
||||
expected_argument = tc["expected"]
|
||||
assert tool_invocation_content == f"[{expected_tool_name}, {expected_argument}]"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:tool_calling",
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_with_tool_choice_none(client_with_models, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=text_model_id,
|
||||
messages=tc["messages"],
|
||||
tools=tc["tools"],
|
||||
tool_config={"tool_choice": "none"},
|
||||
stream=True,
|
||||
)
|
||||
tool_invocation_content = extract_tool_invocation_content(response)
|
||||
assert tool_invocation_content == ""
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:structured_output",
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
|
||||
|
||||
class NBAStats(BaseModel):
|
||||
year_for_draft: int
|
||||
num_seasons_in_nba: int
|
||||
|
||||
class AnswerFormat(BaseModel):
|
||||
first_name: str
|
||||
last_name: str
|
||||
year_of_birth: int
|
||||
nba_stats: NBAStats
|
||||
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=text_model_id,
|
||||
messages=tc["messages"],
|
||||
response_format={
|
||||
"type": "json_schema",
|
||||
"json_schema": AnswerFormat.model_json_schema(),
|
||||
},
|
||||
stream=False,
|
||||
)
|
||||
answer = AnswerFormat.model_validate_json(response.completion_message.content)
|
||||
expected = tc["expected"]
|
||||
assert answer.first_name == expected["first_name"]
|
||||
assert answer.last_name == expected["last_name"]
|
||||
assert answer.year_of_birth == expected["year_of_birth"]
|
||||
assert answer.nba_stats.num_seasons_in_nba == expected["num_seasons_in_nba"]
|
||||
assert answer.nba_stats.year_for_draft == expected["year_for_draft"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:chat_completion:tool_calling_tools_absent",
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_tool_calling_tools_not_in_request(
|
||||
client_with_models, text_model_id, test_case, streaming
|
||||
):
|
||||
tc = TestCase(test_case)
|
||||
|
||||
# TODO: more dynamic lookup on tool_prompt_format for model family
|
||||
tool_prompt_format = "json" if "3.1" in text_model_id else "python_list"
|
||||
request = {
|
||||
"model_id": text_model_id,
|
||||
"messages": tc["messages"],
|
||||
"tools": tc["tools"],
|
||||
"tool_choice": "auto",
|
||||
"tool_prompt_format": tool_prompt_format,
|
||||
"stream": streaming,
|
||||
}
|
||||
|
||||
response = client_with_models.inference.chat_completion(**request)
|
||||
|
||||
if streaming:
|
||||
for chunk in response:
|
||||
delta = chunk.event.delta
|
||||
if delta.type == "tool_call" and delta.parse_status == "succeeded":
|
||||
assert delta.tool_call.tool_name == "get_object_namespace_list"
|
||||
if delta.type == "tool_call" and delta.parse_status == "failed":
|
||||
# expect raw message that failed to parse in tool_call
|
||||
assert isinstance(delta.tool_call, str)
|
||||
assert len(delta.tool_call) > 0
|
||||
else:
|
||||
for tc in response.completion_message.tool_calls:
|
||||
assert tc.tool_name == "get_object_namespace_list"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
# Tests if the model can handle simple messages like "Hi" or
|
||||
# a message unrelated to one of the tool calls
|
||||
"inference:chat_completion:text_then_tool",
|
||||
# Tests if the model can do full tool call with responses correctly
|
||||
"inference:chat_completion:tool_then_answer",
|
||||
# Tests if model can generate multiple params and
|
||||
# read outputs correctly
|
||||
"inference:chat_completion:array_parameter",
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
|
||||
"""This test tests the model's tool calling loop in various scenarios"""
|
||||
if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
|
||||
pytest.xfail("Not tested for non-llama4 models yet")
|
||||
|
||||
tc = TestCase(test_case)
|
||||
messages = []
|
||||
|
||||
# keep going until either
|
||||
# 1. we have messages to test in multi-turn
|
||||
# 2. no messages bust last message is tool response
|
||||
while len(tc["messages"]) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"):
|
||||
# do not take new messages if last message is tool response
|
||||
if len(messages) == 0 or messages[-1]["role"] != "tool":
|
||||
new_messages = tc["messages"].pop(0)
|
||||
messages += new_messages
|
||||
|
||||
# pprint(messages)
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=text_model_id,
|
||||
messages=messages,
|
||||
tools=tc["tools"],
|
||||
stream=False,
|
||||
sampling_params={
|
||||
"strategy": {
|
||||
"type": "top_p",
|
||||
"top_p": 0.9,
|
||||
"temperature": 0.6,
|
||||
}
|
||||
},
|
||||
)
|
||||
op_msg = response.completion_message
|
||||
messages.append(op_msg.model_dump())
|
||||
# print(op_msg)
|
||||
|
||||
assert op_msg.role == "assistant"
|
||||
expected = tc["expected"].pop(0)
|
||||
assert len(op_msg.tool_calls) == expected["num_tool_calls"]
|
||||
|
||||
if expected["num_tool_calls"] > 0:
|
||||
assert op_msg.tool_calls[0].tool_name == expected["tool_name"]
|
||||
assert op_msg.tool_calls[0].arguments == expected["tool_arguments"]
|
||||
|
||||
tool_response = tc["tool_responses"].pop(0)
|
||||
messages.append(
|
||||
# Tool Response Message
|
||||
{
|
||||
"role": "tool",
|
||||
"call_id": op_msg.tool_calls[0].call_id,
|
||||
"content": tool_response["response"],
|
||||
}
|
||||
)
|
||||
else:
|
||||
actual_answer = op_msg.content.lower()
|
||||
# pprint(actual_answer)
|
||||
assert expected["answer"] in actual_answer
|
||||
|
||||
# sleep to avoid rate limit
|
||||
sleep(1)
|
|
@ -25,16 +25,19 @@ def base64_image_data(image_path):
|
|||
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def base64_image_url(base64_image_data):
|
||||
return f"data:image/png;base64,{base64_image_data}"
|
||||
|
||||
|
||||
def test_image_chat_completion_non_streaming(client_with_models, vision_model_id):
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"url": {
|
||||
"uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
|
||||
},
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -43,12 +46,12 @@ def test_image_chat_completion_non_streaming(client_with_models, vision_model_id
|
|||
},
|
||||
],
|
||||
}
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
response = client_with_models.chat.completions.create(
|
||||
model=vision_model_id,
|
||||
messages=[message],
|
||||
stream=False,
|
||||
)
|
||||
message_content = response.completion_message.content.lower().strip()
|
||||
message_content = response.choices[0].message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
|
||||
|
||||
|
@ -68,8 +71,13 @@ def multi_image_data():
|
|||
return encoded_files
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multi_image_url(multi_image_data):
|
||||
return [f"data:image/jpeg;base64,{data}" for data in multi_image_data]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("stream", [True, False])
|
||||
def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream):
|
||||
def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_url, stream):
|
||||
supported_models = ["llama-4", "gpt-4o", "llama4"]
|
||||
if not any(model in vision_model_id.lower() for model in supported_models):
|
||||
pytest.skip(
|
||||
|
@ -81,15 +89,15 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
|
|||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"data": multi_image_data[0],
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": multi_image_url[0],
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"data": multi_image_data[1],
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": multi_image_url[1],
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -99,17 +107,17 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
|
|||
],
|
||||
},
|
||||
]
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
response = client_with_models.chat.completions.create(
|
||||
model=vision_model_id,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
)
|
||||
if stream:
|
||||
message_content = ""
|
||||
for chunk in response:
|
||||
message_content += chunk.event.delta.text
|
||||
message_content += chunk.choices[0].delta.content
|
||||
else:
|
||||
message_content = response.completion_message.content
|
||||
message_content = response.choices[0].message.content
|
||||
assert len(message_content) > 0
|
||||
assert any(expected in message_content.lower().strip() for expected in {"bedroom"}), message_content
|
||||
|
||||
|
@ -125,17 +133,17 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
|
|||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"data": multi_image_data[2],
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": multi_image_data[2],
|
||||
},
|
||||
},
|
||||
{"type": "text", "text": "How about this one?"},
|
||||
],
|
||||
},
|
||||
)
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
response = client_with_models.chat.completions.create(
|
||||
model=vision_model_id,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
)
|
||||
|
@ -144,7 +152,7 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
|
|||
for chunk in response:
|
||||
message_content += chunk.event.delta.text
|
||||
else:
|
||||
message_content = response.completion_message.content
|
||||
message_content = response.choices[0].message.content
|
||||
assert len(message_content) > 0
|
||||
assert any(expected in message_content.lower().strip() for expected in {"sword", "shield"}), message_content
|
||||
|
||||
|
@ -154,11 +162,9 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
|
|||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"url": {
|
||||
"uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
|
||||
},
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -167,23 +173,23 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
|
|||
},
|
||||
],
|
||||
}
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
response = client_with_models.chat.completions.create(
|
||||
model=vision_model_id,
|
||||
messages=[message],
|
||||
stream=True,
|
||||
)
|
||||
streamed_content = ""
|
||||
for chunk in response:
|
||||
streamed_content += chunk.event.delta.text.lower()
|
||||
streamed_content += chunk.choices[0].delta.content.lower()
|
||||
assert len(streamed_content) > 0
|
||||
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
|
||||
|
||||
|
||||
def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_data):
|
||||
def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_url):
|
||||
image_spec = {
|
||||
"type": "image",
|
||||
"image": {
|
||||
"data": base64_image_data,
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": base64_image_url,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -197,10 +203,10 @@ def test_image_chat_completion_base64(client_with_models, vision_model_id, base6
|
|||
},
|
||||
],
|
||||
}
|
||||
response = client_with_models.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
response = client_with_models.chat.completions.create(
|
||||
model=vision_model_id,
|
||||
messages=[message],
|
||||
stream=False,
|
||||
)
|
||||
message_content = response.completion_message.content.lower().strip()
|
||||
message_content = response.choices[0].message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
|
|
|
@ -14,6 +14,13 @@ from . import skip_in_github_actions
|
|||
# LLAMA_STACK_CONFIG="nvidia" pytest -v tests/integration/providers/nvidia/test_datastore.py
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def skip_if_no_nvidia_provider(llama_stack_client):
|
||||
provider_types = {p.provider_type for p in llama_stack_client.providers.list() if p.api == "datasetio"}
|
||||
if "remote::nvidia" not in provider_types:
|
||||
pytest.skip("datasetio=remote::nvidia provider not configured, skipping")
|
||||
|
||||
|
||||
# nvidia provider only
|
||||
@skip_in_github_actions
|
||||
@pytest.mark.parametrize(
|
||||
|
|
167
tests/integration/recordings/responses/168daab89068.json
Normal file
167
tests/integration/recordings/responses/168daab89068.json
Normal file
|
@ -0,0 +1,167 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://localhost:11434/api/generate",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"raw": true,
|
||||
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"greet_everyone\",\n \"description\": \"\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"url\"],\n \"properties\": {\n \"url\": {\n \"type\": \"string\",\n \"description\": \"\"\n }\n }\n }\n },\n {\n \"name\": \"get_boiling_point\",\n \"description\": \"\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n \",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"liquid_name\"],\n \"properties\": {\n \"liquid_name\": {\n \"type\": \"string\",\n \"description\": \"\"\n },\n \"celsius\": {\n \"type\": \"boolean\",\n \"description\": \"\",\n \"default\": \"True\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[greet_everyone(url=\"world\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
"options": {
|
||||
"temperature": 0.0
|
||||
},
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/api/generate",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.663224Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "How",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.706706Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " can",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.751075Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " I",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.794187Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " assist",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.837831Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " you",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.879926Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " further",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.92182Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "?",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.963339Z",
|
||||
"done": true,
|
||||
"done_reason": "stop",
|
||||
"total_duration": 492973041,
|
||||
"load_duration": 103979375,
|
||||
"prompt_eval_count": 482,
|
||||
"prompt_eval_duration": 87032041,
|
||||
"eval_count": 8,
|
||||
"eval_duration": 300586375,
|
||||
"response": "",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
31
tests/integration/recordings/responses/1c0a34fa2e0c.json
Normal file
31
tests/integration/recordings/responses/1c0a34fa2e0c.json
Normal file
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b",
|
||||
"input": [],
|
||||
"encoding_format": "float"
|
||||
},
|
||||
"endpoint": "/v1/embeddings",
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
|
||||
"__data__": {
|
||||
"data": [],
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b",
|
||||
"object": "list",
|
||||
"usage": {
|
||||
"prompt_tokens": 0,
|
||||
"total_tokens": 0,
|
||||
"completion_tokens": 0
|
||||
},
|
||||
"perf_metrics": null
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
4743
tests/integration/recordings/responses/235c36771a8a.json
Normal file
4743
tests/integration/recordings/responses/235c36771a8a.json
Normal file
File diff suppressed because it is too large
Load diff
89
tests/integration/recordings/responses/239f4768f5aa.json
Normal file
89
tests/integration/recordings/responses/239f4768f5aa.json
Normal file
|
@ -0,0 +1,89 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Please give me information about Michael Jordan."
|
||||
}
|
||||
],
|
||||
"response_format": {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "AnswerFormat",
|
||||
"schema": {
|
||||
"properties": {
|
||||
"first_name": {
|
||||
"title": "First Name",
|
||||
"type": "string"
|
||||
},
|
||||
"last_name": {
|
||||
"title": "Last Name",
|
||||
"type": "string"
|
||||
},
|
||||
"year_of_birth": {
|
||||
"title": "Year Of Birth",
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"first_name",
|
||||
"last_name",
|
||||
"year_of_birth"
|
||||
],
|
||||
"title": "AnswerFormat",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-433",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}\n\n \t\t\t\t\t\t\t\t\t\t\t \t\t ",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1758979490,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 31,
|
||||
"prompt_tokens": 60,
|
||||
"total_tokens": 91,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
31
tests/integration/recordings/responses/24e106063719.json
Normal file
31
tests/integration/recordings/responses/24e106063719.json
Normal file
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b",
|
||||
"input": [],
|
||||
"encoding_format": "base64"
|
||||
},
|
||||
"endpoint": "/v1/embeddings",
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
|
||||
"__data__": {
|
||||
"data": [],
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b",
|
||||
"object": "list",
|
||||
"usage": {
|
||||
"prompt_tokens": 0,
|
||||
"total_tokens": 0,
|
||||
"completion_tokens": 0
|
||||
},
|
||||
"perf_metrics": null
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
316
tests/integration/recordings/responses/25649d730247.json
Normal file
316
tests/integration/recordings/responses/25649d730247.json
Normal file
|
@ -0,0 +1,316 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the name of the Sun in latin?"
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The Latin",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " name",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " for",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " the",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Sun",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " is",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " \"",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Sol",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "\".",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920389,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 11,
|
||||
"prompt_tokens": 20,
|
||||
"total_tokens": 31,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
44
tests/integration/recordings/responses/2a5a4e821bc8.json
Normal file
44
tests/integration/recordings/responses/2a5a4e821bc8.json
Normal file
|
@ -0,0 +1,44 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"prompt": "Hello, world!",
|
||||
"logprobs": false,
|
||||
"stream": false,
|
||||
"extra_body": {}
|
||||
},
|
||||
"endpoint": "/v1/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "cmpl-74",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "Hello! How can I assist you today?"
|
||||
}
|
||||
],
|
||||
"created": 1758975636,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 29,
|
||||
"total_tokens": 39,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
2141
tests/integration/recordings/responses/2fef6eda9cd7.json
Normal file
2141
tests/integration/recordings/responses/2fef6eda9cd7.json
Normal file
File diff suppressed because one or more lines are too long
92
tests/integration/recordings/responses/38ea441b5f83.json
Normal file
92
tests/integration/recordings/responses/38ea441b5f83.json
Normal file
|
@ -0,0 +1,92 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Pretend you are a weather assistant."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in San Francisco, CA?"
|
||||
}
|
||||
],
|
||||
"stream": false,
|
||||
"tool_choice": "auto",
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state (both required), e.g. San Francisco, CA."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"location"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-761",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_cj8ownwc",
|
||||
"function": {
|
||||
"arguments": "{\"location\":\"San Francisco, CA\"}",
|
||||
"name": "get_weather"
|
||||
},
|
||||
"type": "function",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1758975113,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 18,
|
||||
"prompt_tokens": 185,
|
||||
"total_tokens": 203,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
44
tests/integration/recordings/responses/5b2088233334.json
Normal file
44
tests/integration/recordings/responses/5b2088233334.json
Normal file
|
@ -0,0 +1,44 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"prompt": "Hello, world!",
|
||||
"logprobs": true,
|
||||
"stream": false,
|
||||
"extra_body": {}
|
||||
},
|
||||
"endpoint": "/v1/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "cmpl-809",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "Hello! It's nice to meet you. Is there anything I can help you with or would you like to chat?"
|
||||
}
|
||||
],
|
||||
"created": 1758975633,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 25,
|
||||
"prompt_tokens": 29,
|
||||
"total_tokens": 54,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
550
tests/integration/recordings/responses/651af76045af.json
Normal file
550
tests/integration/recordings/responses/651af76045af.json
Normal file
|
@ -0,0 +1,550 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the name of the US captial?"
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The name",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " of",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " the",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " US",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " capital",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " is",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Washington",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ",",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " D",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ".C",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ".",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " (",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "short",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " for",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " District",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " of",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Columbia",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ").",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920398,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 20,
|
||||
"prompt_tokens": 20,
|
||||
"total_tokens": 40,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
60
tests/integration/recordings/responses/65c12de0a1db.json
Normal file
60
tests/integration/recordings/responses/65c12de0a1db.json
Normal file
|
@ -0,0 +1,60 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Pretend you are a weather assistant."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in San Francisco, CA?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-123",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Hello! As of my knowledge cutoff on December 15th, I have the latest information for you. However, please note that my data may not be entirely up-to-date.\n\nCurrently, and based on historical climate patterns, it appears to be a partly cloudy day with mild temperatures in San Francisco, CA. Expect a temperature range of around 48\u00b0F (9\u00b0C) to 54\u00b0F (12\u00b0C). It's likely to be a breezy day, with winds blowing at about 13 mph (21 km/h).\n\nHowever, if I were to look into more recent weather patterns or forecasts, I would recommend checking the latest conditions directly from reliable sources such as the National Weather Service or local news outlets for more accurate and up-to-date information.\n\nPlease let me know how I can further assist you.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1758978071,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 163,
|
||||
"prompt_tokens": 45,
|
||||
"total_tokens": 208,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
7926
tests/integration/recordings/responses/7eace23f03df.json
Normal file
7926
tests/integration/recordings/responses/7eace23f03df.json
Normal file
File diff suppressed because it is too large
Load diff
56
tests/integration/recordings/responses/88ce59013228.json
Normal file
56
tests/integration/recordings/responses/88ce59013228.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b",
|
||||
"input": "Test dimensions parameter",
|
||||
"encoding_format": "float",
|
||||
"dimensions": 16
|
||||
},
|
||||
"endpoint": "/v1/embeddings",
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
|
||||
"__data__": {
|
||||
"data": [
|
||||
{
|
||||
"embedding": [
|
||||
-0.9296875,
|
||||
5.1875,
|
||||
-2.140625,
|
||||
0.171875,
|
||||
-2.25,
|
||||
-0.8359375,
|
||||
-0.828125,
|
||||
1.15625,
|
||||
2.328125,
|
||||
-1.0078125,
|
||||
-3.0,
|
||||
4.09375,
|
||||
0.8359375,
|
||||
0.1015625,
|
||||
2.015625,
|
||||
-1.0859375
|
||||
],
|
||||
"index": 0,
|
||||
"object": "embedding",
|
||||
"raw_output": null
|
||||
}
|
||||
],
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b",
|
||||
"object": "list",
|
||||
"usage": {
|
||||
"prompt_tokens": 5,
|
||||
"total_tokens": 5,
|
||||
"completion_tokens": 0
|
||||
},
|
||||
"perf_metrics": null
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
39
tests/integration/recordings/responses/8cdb7e65fcfe.json
Normal file
39
tests/integration/recordings/responses/8cdb7e65fcfe.json
Normal file
|
@ -0,0 +1,39 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b",
|
||||
"input": "Test dimensions parameter",
|
||||
"encoding_format": "base64",
|
||||
"dimensions": 16
|
||||
},
|
||||
"endpoint": "/v1/embeddings",
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
|
||||
"__data__": {
|
||||
"data": [
|
||||
{
|
||||
"embedding": "AABuvwAApkAAAAnAAAAwPgAAEMAAAFa/AABUvwAAlD8AABVAAACBvwAAQMAAAINAAABWPwAA0D0AAAFAAACLvw==",
|
||||
"index": 0,
|
||||
"object": "embedding",
|
||||
"raw_output": null
|
||||
}
|
||||
],
|
||||
"model": "accounts/fireworks/models/qwen3-embedding-8b",
|
||||
"object": "list",
|
||||
"usage": {
|
||||
"prompt_tokens": 5,
|
||||
"total_tokens": 5,
|
||||
"completion_tokens": 0
|
||||
},
|
||||
"perf_metrics": null
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
56
tests/integration/recordings/responses/901b5e7db4aa.json
Normal file
56
tests/integration/recordings/responses/901b5e7db4aa.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, world!"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "1d64ff81-b7c4-40c6-9509-cca71759da3e",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1758920401,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 26,
|
||||
"prompt_tokens": 14,
|
||||
"total_tokens": 40,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
347
tests/integration/recordings/responses/949d3ad16367.json
Normal file
347
tests/integration/recordings/responses/949d3ad16367.json
Normal file
|
@ -0,0 +1,347 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://localhost:11434/api/generate",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"raw": true,
|
||||
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"greet_everyone\",\n \"description\": \"\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"url\"],\n \"properties\": {\n \"url\": {\n \"type\": \"string\",\n \"description\": \"\"\n }\n }\n }\n },\n {\n \"name\": \"get_boiling_point\",\n \"description\": \"\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n \",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"liquid_name\"],\n \"properties\": {\n \"liquid_name\": {\n \"type\": \"string\",\n \"description\": \"\"\n },\n \"celsius\": {\n \"type\": \"boolean\",\n \"description\": \"\",\n \"default\": \"True\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[greet_everyone(url=\"world\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHow can I assist you further?<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the boiling point of polyjuice? Use tools to answer.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
"options": {
|
||||
"temperature": 0.0
|
||||
},
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/api/generate",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.177453Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "[",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.220271Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "get",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.261232Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "_bo",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.302818Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "iling",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.344343Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "_point",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.386025Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "(",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.42778Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "liquid",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.469673Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "_name",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.512543Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "='",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.554479Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "poly",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.597092Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "ju",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.639581Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "ice",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.683223Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "',",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.72556Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " c",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.768012Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "elsius",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.8098Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "=True",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.851578Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": ")]",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:57.893693Z",
|
||||
"done": true,
|
||||
"done_reason": "stop",
|
||||
"total_duration": 885274541,
|
||||
"load_duration": 99578333,
|
||||
"prompt_eval_count": 514,
|
||||
"prompt_eval_duration": 67915875,
|
||||
"eval_count": 18,
|
||||
"eval_duration": 717086791,
|
||||
"response": "",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
74
tests/integration/recordings/responses/969a9a757e0c.json
Normal file
74
tests/integration/recordings/responses/969a9a757e0c.json
Normal file
|
@ -0,0 +1,74 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
|
||||
}
|
||||
],
|
||||
"stream": false,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the weather in a given city",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"city": {
|
||||
"type": "string",
|
||||
"description": "The city to get the weather for"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "0fe94e7d-f25b-4843-ba0a-e402e0764830",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "I can\u2019t help with that. If you're looking for current weather information, I recommend checking a weather website or app, such as AccuWeather or Weather.com. Is there anything else I can help you with?",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1758920402,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 45,
|
||||
"prompt_tokens": 27,
|
||||
"total_tokens": 72,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
4135
tests/integration/recordings/responses/9c1a4c5336a7.json
Normal file
4135
tests/integration/recordings/responses/9c1a4c5336a7.json
Normal file
File diff suppressed because it is too large
Load diff
55
tests/integration/recordings/responses/a369881bb3a2.json
Normal file
55
tests/integration/recordings/responses/a369881bb3a2.json
Normal file
|
@ -0,0 +1,55 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test trace 0"
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-272",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "I'm happy to help you with a test. Since we are in the middle of a text-based conversation, I'll do my best to simulate a simple test tracing process.\n\n**Trace Test Results**\n\nTo perform this test, please follow these steps:\n\n1. Type \"test\" on command mode.\n2. Press Enter.\n\nNow, let's start tracing...\n\nTest Tracing Results:\nTest Case: General Functions\nTest Case Result: PASS\n\nSystem Response:\n\n```\n# System Boot Time: 2023-10-13T14:30:00\n# CPU Temperature: 35\u00b0C\n# Disk Space Available: 80%\n```\n\nNext Steps?\n\nType 'done' to exit the test, or 'run' for more tests.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1758978134,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 152,
|
||||
"prompt_tokens": 29,
|
||||
"total_tokens": 181,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
44
tests/integration/recordings/responses/a46b77ffd494.json
Normal file
44
tests/integration/recordings/responses/a46b77ffd494.json
Normal file
|
@ -0,0 +1,44 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
|
||||
"stop": "1963",
|
||||
"stream": false,
|
||||
"extra_body": {}
|
||||
},
|
||||
"endpoint": "/v1/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "cmpl-183",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "Michael Jordan was born in the year of "
|
||||
}
|
||||
],
|
||||
"created": 1758978053,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 11,
|
||||
"prompt_tokens": 48,
|
||||
"total_tokens": 59,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
12345
tests/integration/recordings/responses/aa745b14fe67.json
Normal file
12345
tests/integration/recordings/responses/aa745b14fe67.json
Normal file
File diff suppressed because it is too large
Load diff
112
tests/integration/recordings/responses/c3dbccc5de74.json
Normal file
112
tests/integration/recordings/responses/c3dbccc5de74.json
Normal file
|
@ -0,0 +1,112 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Pretend you are a weather assistant."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in San Francisco, CA?"
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"tool_choice": "auto",
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state (both required), e.g. San Francisco, CA."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"location"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_wubm4yax",
|
||||
"function": {
|
||||
"arguments": "{\"location\":\"San Francisco, CA\"}",
|
||||
"name": "get_weather"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758975115,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758975115,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
47
tests/integration/recordings/responses/c8e196049fe4.json
Normal file
47
tests/integration/recordings/responses/c8e196049fe4.json
Normal file
|
@ -0,0 +1,47 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
|
||||
"stop": [
|
||||
"blathering",
|
||||
"1963"
|
||||
],
|
||||
"stream": false,
|
||||
"extra_body": {}
|
||||
},
|
||||
"endpoint": "/v1/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "cmpl-381",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "Michael Jordan was born in the year of "
|
||||
}
|
||||
],
|
||||
"created": 1758978056,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 11,
|
||||
"prompt_tokens": 48,
|
||||
"total_tokens": 59,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
4135
tests/integration/recordings/responses/ca332c91adee.json
Normal file
4135
tests/integration/recordings/responses/ca332c91adee.json
Normal file
File diff suppressed because it is too large
Load diff
55
tests/integration/recordings/responses/cb1099daed49.json
Normal file
55
tests/integration/recordings/responses/cb1099daed49.json
Normal file
|
@ -0,0 +1,55 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test trace 1"
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-122",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "It appears you're trying to initiate a conversation or test the functionality of this AI system. I'm happy to chat with you!\n\nWould you like to:\nA) Ask me a question on a specific topic\nB) Engage in a conversational dialogue on a topic of your choice\nC) Play a text-based game\nD) Test my language understanding capabilities\n\nPlease respond with the letter of your preferred activity.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1758978142,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 85,
|
||||
"prompt_tokens": 29,
|
||||
"total_tokens": 114,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
|
@ -13,22 +13,23 @@
|
|||
"__data__": {
|
||||
"models": [
|
||||
{
|
||||
"model": "llama3.2-vision:11b",
|
||||
"name": "llama3.2-vision:11b",
|
||||
"digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
|
||||
"expires_at": "2025-09-03T11:51:35.966409-07:00",
|
||||
"size": 12401209008,
|
||||
"size_vram": 12401209008,
|
||||
"model": "llama3.2:3b",
|
||||
"name": "llama3.2:3b",
|
||||
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
|
||||
"expires_at": "2025-09-27T11:54:56.718552-07:00",
|
||||
"size": 3367856128,
|
||||
"size_vram": 3367856128,
|
||||
"details": {
|
||||
"parent_model": "",
|
||||
"format": "gguf",
|
||||
"family": "mllama",
|
||||
"family": "llama",
|
||||
"families": [
|
||||
"mllama"
|
||||
"llama"
|
||||
],
|
||||
"parameter_size": "10.7B",
|
||||
"parameter_size": "3.2B",
|
||||
"quantization_level": "Q4_K_M"
|
||||
}
|
||||
},
|
||||
"context_length": 4096
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
4135
tests/integration/recordings/responses/d10fc0f9ac66.json
Normal file
4135
tests/integration/recordings/responses/d10fc0f9ac66.json
Normal file
File diff suppressed because it is too large
Load diff
43
tests/integration/recordings/responses/d45ca9107508.json
Normal file
43
tests/integration/recordings/responses/d45ca9107508.json
Normal file
|
@ -0,0 +1,43 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
|
||||
"stream": false,
|
||||
"extra_body": {}
|
||||
},
|
||||
"endpoint": "/v1/completions",
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "1bbb8db5-63e5-40cd-8ffe-59e0e88bf8f0",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "4. At the beginning of the year, a woman has $5,000"
|
||||
}
|
||||
],
|
||||
"created": 1758920353,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 16,
|
||||
"prompt_tokens": 25,
|
||||
"total_tokens": 41,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
67
tests/integration/recordings/responses/d927b47032de.json
Normal file
67
tests/integration/recordings/responses/d927b47032de.json
Normal file
File diff suppressed because one or more lines are too long
1077
tests/integration/recordings/responses/e22f98c05933.json
Normal file
1077
tests/integration/recordings/responses/e22f98c05933.json
Normal file
File diff suppressed because it is too large
Load diff
56
tests/integration/recordings/responses/e4daa5642f6e.json
Normal file
56
tests/integration/recordings/responses/e4daa5642f6e.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Which planet has rings around it with a name starting with letter S?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "26632ea9-3481-419d-bc0d-83c177257bc4",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "There are two planets in our solar system with ring systems that have names starting with the letter S:\n\n1. **Saturn** - Its ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice and rock particles that range in size from tiny dust grains to massive boulders.\n2. **Saturn's moon** - The ring system of **Saturn's moon, Rhea**, is sometimes referred to as a \"ring system\" even though it's much smaller and less prominent than Saturn's. However, it's worth noting that Rhea's ring system is not as well-known as Saturn's.\n\nIf you're looking for a planet with a ring system that starts with the letter S and is not a moon, then the answer is Saturn!",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1758920397,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 164,
|
||||
"prompt_tokens": 24,
|
||||
"total_tokens": 188,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
185
tests/integration/recordings/responses/e61266e87842.json
Normal file
185
tests/integration/recordings/responses/e61266e87842.json
Normal file
|
@ -0,0 +1,185 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://localhost:11434/api/generate",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"raw": true,
|
||||
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"greet_everyone\",\n \"description\": \"\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"url\"],\n \"properties\": {\n \"url\": {\n \"type\": \"string\",\n \"description\": \"\"\n }\n }\n }\n },\n {\n \"name\": \"get_boiling_point\",\n \"description\": \"\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n \",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"liquid_name\"],\n \"properties\": {\n \"liquid_name\": {\n \"type\": \"string\",\n \"description\": \"\"\n },\n \"celsius\": {\n \"type\": \"boolean\",\n \"description\": \"\",\n \"default\": \"True\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
"options": {
|
||||
"temperature": 0.0
|
||||
},
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/api/generate",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.034121Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "[g",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.07569Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "reet",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.116927Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "_every",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.159755Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "one",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.201675Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "(url",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.243056Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "=\"",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.284651Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "world",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.326276Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "\")]",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-09-27T18:05:56.367959Z",
|
||||
"done": true,
|
||||
"done_reason": "stop",
|
||||
"total_duration": 5381441291,
|
||||
"load_duration": 4112439791,
|
||||
"prompt_eval_count": 459,
|
||||
"prompt_eval_duration": 932587833,
|
||||
"eval_count": 9,
|
||||
"eval_duration": 334328250,
|
||||
"response": "",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
706
tests/integration/recordings/responses/e99f14805360.json
Normal file
706
tests/integration/recordings/responses/e99f14805360.json
Normal file
|
@ -0,0 +1,706 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, world!"
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Hello!",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " It",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "'s",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " nice",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " to",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " meet",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " you",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ".",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Is",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " there",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " something",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " I",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " can",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " help",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " you",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " with",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ",",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " or",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " would",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " you",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " like",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " to",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " chat",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "?",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "d583f66e-de11-4210-8153-54be000a2783",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1758920391,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 26,
|
||||
"prompt_tokens": 14,
|
||||
"total_tokens": 40,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
996
tests/integration/recordings/responses/f3cbd3f07e60.json
Normal file
996
tests/integration/recordings/responses/f3cbd3f07e60.json
Normal file
|
@ -0,0 +1,996 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
|
||||
"max_tokens": 50,
|
||||
"stream": true,
|
||||
"extra_body": {}
|
||||
},
|
||||
"endpoint": "/v1/completions",
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " a"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " type"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " of"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " __________________"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "_____"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": ".\n\n"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "##"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " Step"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " "
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "1"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": ":"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " Identify"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " the"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " type"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " of"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " flower"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " mentioned"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " in"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " the"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " sentence"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": ".\n"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "The"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " sentence"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " mentions"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " \""
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "vio"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "lets"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": ".\"\n\n"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "##"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " Step"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " "
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "2"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": ":"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " Determine"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " the"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " type"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " of"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " flower"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " v"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "io"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "lets"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " are"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": ".\n"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "V"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "io"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": "lets"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " are"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " a"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " type"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": " of"
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.completion.Completion",
|
||||
"__data__": {
|
||||
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"text": ""
|
||||
}
|
||||
],
|
||||
"created": 1758920354,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 50,
|
||||
"prompt_tokens": 25,
|
||||
"total_tokens": 75,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
4135
tests/integration/recordings/responses/f6469c4656dd.json
Normal file
4135
tests/integration/recordings/responses/f6469c4656dd.json
Normal file
File diff suppressed because it is too large
Load diff
56
tests/integration/recordings/responses/f701ad342bd8.json
Normal file
56
tests/integration/recordings/responses/f701ad342bd8.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Which planet do humans live on?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "0fd60cd7-dc72-45b7-808c-4da91de80093",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Humans live on a planet called Earth.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1758920388,
|
||||
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 9,
|
||||
"prompt_tokens": 17,
|
||||
"total_tokens": 26,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
|
@ -0,0 +1,527 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.fireworks.ai/inference/v1/v1/models",
|
||||
"headers": {},
|
||||
"body": {},
|
||||
"endpoint": "/v1/models",
|
||||
"model": ""
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/flux-1-dev-fp8",
|
||||
"created": 1729532889,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "FLUMINA_BASE_MODEL",
|
||||
"supports_chat": false,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
|
||||
"created": 1743381121,
|
||||
"object": "model",
|
||||
"owned_by": "tvergho-87e44d",
|
||||
"kind": "HF_PEFT_ADDON",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/flux-kontext-max",
|
||||
"created": 1750714611,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "FLUMINA_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": true,
|
||||
"supports_tools": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/flux-kontext-pro",
|
||||
"created": 1750488264,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "FLUMINA_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": true,
|
||||
"supports_tools": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
|
||||
"created": 1748467427,
|
||||
"object": "model",
|
||||
"owned_by": "sentientfoundation-serverless",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/deepseek-v3",
|
||||
"created": 1735576668,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
|
||||
"created": 1739563474,
|
||||
"object": "model",
|
||||
"owned_by": "sentientfoundation",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/gpt-oss-120b",
|
||||
"created": 1754345600,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
|
||||
"created": 1753211090,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 262144
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
|
||||
"created": 1753916446,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
|
||||
"created": 1753124424,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 262144
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
|
||||
"created": 1753455434,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 262144
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/qwen3-embedding-8b",
|
||||
"created": 1755707090,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 40960
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/deepseek-v3-0324",
|
||||
"created": 1742827220,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 163840
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/deepseek-v3p1-terminus",
|
||||
"created": 1758586241,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 163840
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/kimi-k2-instruct",
|
||||
"created": 1752259096,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/gpt-oss-20b",
|
||||
"created": 1754345466,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
|
||||
"created": 1743878495,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": true,
|
||||
"supports_tools": true,
|
||||
"context_length": 1048576
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
|
||||
"created": 1754063588,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 262144
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
|
||||
"created": 1733442103,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
|
||||
"created": 1743392739,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": true,
|
||||
"supports_tools": false,
|
||||
"context_length": 128000
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/qwen3-235b-a22b",
|
||||
"created": 1745885249,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/glm-4p5-air",
|
||||
"created": 1754089426,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/deepseek-r1",
|
||||
"created": 1737397673,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 163840
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"created": 1721692808,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/deepseek-r1-basic",
|
||||
"created": 1742306746,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 163840
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/deepseek-v3p1",
|
||||
"created": 1755758988,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 163840
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/flux-1-schnell-fp8",
|
||||
"created": 1729535376,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "FLUMINA_BASE_MODEL",
|
||||
"supports_chat": false,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/glm-4p5",
|
||||
"created": 1753809636,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/kimi-k2-instruct-0905",
|
||||
"created": 1757018994,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 262144
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
|
||||
"created": 1721428386,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/llama4-scout-instruct-basic",
|
||||
"created": 1743878279,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": true,
|
||||
"supports_tools": true,
|
||||
"context_length": 1048576
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/qwen3-30b-a3b",
|
||||
"created": 1745878133,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
|
||||
"created": 1721287357,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 131072
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/deepseek-r1-0528",
|
||||
"created": 1748456377,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 163840
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/mixtral-8x22b-instruct",
|
||||
"created": 1713375508,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": true,
|
||||
"context_length": 65536
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
|
||||
"created": 1753808388,
|
||||
"object": "model",
|
||||
"owned_by": "fireworks",
|
||||
"kind": "HF_BASE_MODEL",
|
||||
"supports_chat": true,
|
||||
"supports_image_input": false,
|
||||
"supports_tools": false,
|
||||
"context_length": 262144
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
|
@ -0,0 +1,834 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/models",
|
||||
"headers": {},
|
||||
"body": {},
|
||||
"endpoint": "/v1/models",
|
||||
"model": ""
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4-0613",
|
||||
"created": 1686588896,
|
||||
"object": "model",
|
||||
"owned_by": "openai"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4",
|
||||
"created": 1687882411,
|
||||
"object": "model",
|
||||
"owned_by": "openai"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-3.5-turbo",
|
||||
"created": 1677610602,
|
||||
"object": "model",
|
||||
"owned_by": "openai"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-5-codex",
|
||||
"created": 1757527818,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-audio-2025-08-28",
|
||||
"created": 1756256146,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-realtime",
|
||||
"created": 1756271701,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-realtime-2025-08-28",
|
||||
"created": 1756271773,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-audio",
|
||||
"created": 1756339249,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "davinci-002",
|
||||
"created": 1692634301,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "babbage-002",
|
||||
"created": 1692634615,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-3.5-turbo-instruct",
|
||||
"created": 1692901427,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-3.5-turbo-instruct-0914",
|
||||
"created": 1694122472,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "dall-e-3",
|
||||
"created": 1698785189,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "dall-e-2",
|
||||
"created": 1698798177,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4-1106-preview",
|
||||
"created": 1698957206,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-3.5-turbo-1106",
|
||||
"created": 1698959748,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "tts-1-hd",
|
||||
"created": 1699046015,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "tts-1-1106",
|
||||
"created": 1699053241,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "tts-1-hd-1106",
|
||||
"created": 1699053533,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "text-embedding-3-small",
|
||||
"created": 1705948997,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "text-embedding-3-large",
|
||||
"created": 1705953180,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4-0125-preview",
|
||||
"created": 1706037612,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4-turbo-preview",
|
||||
"created": 1706037777,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-3.5-turbo-0125",
|
||||
"created": 1706048358,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4-turbo",
|
||||
"created": 1712361441,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4-turbo-2024-04-09",
|
||||
"created": 1712601677,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o",
|
||||
"created": 1715367049,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-2024-05-13",
|
||||
"created": 1715368132,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-mini-2024-07-18",
|
||||
"created": 1721172717,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-mini",
|
||||
"created": 1721172741,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-2024-08-06",
|
||||
"created": 1722814719,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "chatgpt-4o-latest",
|
||||
"created": 1723515131,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o1-mini-2024-09-12",
|
||||
"created": 1725648979,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o1-mini",
|
||||
"created": 1725649008,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-realtime-preview-2024-10-01",
|
||||
"created": 1727131766,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-audio-preview-2024-10-01",
|
||||
"created": 1727389042,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-audio-preview",
|
||||
"created": 1727460443,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-realtime-preview",
|
||||
"created": 1727659998,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "omni-moderation-latest",
|
||||
"created": 1731689265,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "omni-moderation-2024-09-26",
|
||||
"created": 1732734466,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-realtime-preview-2024-12-17",
|
||||
"created": 1733945430,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-audio-preview-2024-12-17",
|
||||
"created": 1734034239,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-mini-realtime-preview-2024-12-17",
|
||||
"created": 1734112601,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-mini-audio-preview-2024-12-17",
|
||||
"created": 1734115920,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o1-2024-12-17",
|
||||
"created": 1734326976,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o1",
|
||||
"created": 1734375816,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-mini-realtime-preview",
|
||||
"created": 1734387380,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-mini-audio-preview",
|
||||
"created": 1734387424,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o3-mini",
|
||||
"created": 1737146383,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o3-mini-2025-01-31",
|
||||
"created": 1738010200,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-2024-11-20",
|
||||
"created": 1739331543,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-search-preview-2025-03-11",
|
||||
"created": 1741388170,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-search-preview",
|
||||
"created": 1741388720,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-mini-search-preview-2025-03-11",
|
||||
"created": 1741390858,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-mini-search-preview",
|
||||
"created": 1741391161,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-transcribe",
|
||||
"created": 1742068463,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-mini-transcribe",
|
||||
"created": 1742068596,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o1-pro-2025-03-19",
|
||||
"created": 1742251504,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o1-pro",
|
||||
"created": 1742251791,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-mini-tts",
|
||||
"created": 1742403959,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o3-2025-04-16",
|
||||
"created": 1744133301,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o4-mini-2025-04-16",
|
||||
"created": 1744133506,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o3",
|
||||
"created": 1744225308,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o4-mini",
|
||||
"created": 1744225351,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4.1-2025-04-14",
|
||||
"created": 1744315746,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4.1",
|
||||
"created": 1744316542,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4.1-mini-2025-04-14",
|
||||
"created": 1744317547,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4.1-mini",
|
||||
"created": 1744318173,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4.1-nano-2025-04-14",
|
||||
"created": 1744321025,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4.1-nano",
|
||||
"created": 1744321707,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-image-1",
|
||||
"created": 1745517030,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "codex-mini-latest",
|
||||
"created": 1746673257,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o3-pro",
|
||||
"created": 1748475349,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-realtime-preview-2025-06-03",
|
||||
"created": 1748907838,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-4o-audio-preview-2025-06-03",
|
||||
"created": 1748908498,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o3-pro-2025-06-10",
|
||||
"created": 1749166761,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o4-mini-deep-research",
|
||||
"created": 1749685485,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o3-deep-research",
|
||||
"created": 1749840121,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o3-deep-research-2025-06-26",
|
||||
"created": 1750865219,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "o4-mini-deep-research-2025-06-26",
|
||||
"created": 1750866121,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-5-chat-latest",
|
||||
"created": 1754073306,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-5-2025-08-07",
|
||||
"created": 1754075360,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-5",
|
||||
"created": 1754425777,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-5-mini-2025-08-07",
|
||||
"created": 1754425867,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-5-mini",
|
||||
"created": 1754425928,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-5-nano-2025-08-07",
|
||||
"created": 1754426303,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-5-nano",
|
||||
"created": 1754426384,
|
||||
"object": "model",
|
||||
"owned_by": "system"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-3.5-turbo-16k",
|
||||
"created": 1683758102,
|
||||
"object": "model",
|
||||
"owned_by": "openai-internal"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "tts-1",
|
||||
"created": 1681940951,
|
||||
"object": "model",
|
||||
"owned_by": "openai-internal"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "whisper-1",
|
||||
"created": 1677532384,
|
||||
"object": "model",
|
||||
"owned_by": "openai-internal"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "text-embedding-ada-002",
|
||||
"created": 1671217299,
|
||||
"object": "model",
|
||||
"owned_by": "openai-internal"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/models",
|
||||
"headers": {},
|
||||
"body": {},
|
||||
"endpoint": "/v1/models",
|
||||
"model": ""
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "nomic-embed-text:latest",
|
||||
"created": 1756922046,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "all-minilm:l6-v2",
|
||||
"created": 1756919946,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2-vision:11b",
|
||||
"created": 1753926302,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2-vision:latest",
|
||||
"created": 1753845527,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama-guard3:1b",
|
||||
"created": 1753479584,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:1b",
|
||||
"created": 1752814944,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "all-minilm:latest",
|
||||
"created": 1748994610,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:3b",
|
||||
"created": 1746123323,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:3b-instruct-fp16",
|
||||
"created": 1746052428,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
|
@ -127,9 +127,8 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
|
|||
name="fireworks",
|
||||
description="Fireworks provider with a text model",
|
||||
defaults={
|
||||
"text_model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"vision_model": "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
|
||||
"embedding_model": "nomic-ai/nomic-embed-text-v1.5",
|
||||
"text_model": "fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
"embedding_model": "fireworks/accounts/fireworks/models/qwen3-embedding-8b",
|
||||
},
|
||||
),
|
||||
}
|
||||
|
|
|
@ -32,8 +32,8 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
|
|||
)
|
||||
|
||||
for i in range(2):
|
||||
llama_stack_client.inference.chat_completion(
|
||||
model_id=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
|
||||
llama_stack_client.chat.completions.create(
|
||||
model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
|
|
@ -83,12 +83,19 @@
|
|||
],
|
||||
"tools": [
|
||||
{
|
||||
"tool_name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"location": {
|
||||
"param_type": "string",
|
||||
"description": "The city and state (both required), e.g. San Francisco, CA."
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state (both required), e.g. San Francisco, CA."
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -116,12 +123,19 @@
|
|||
],
|
||||
"tools": [
|
||||
{
|
||||
"tool_name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"location": {
|
||||
"param_type": "string",
|
||||
"description": "The city and state (both required), e.g. San Francisco, CA."
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state (both required), e.g. San Francisco, CA."
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -162,12 +176,19 @@
|
|||
],
|
||||
"tools": [
|
||||
{
|
||||
"tool_name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"location": {
|
||||
"param_type": "string",
|
||||
"description": "The city and state (both required), e.g. San Francisco, CA."
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state (both required), e.g. San Francisco, CA."
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -192,66 +213,6 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"array_parameter": {
|
||||
"data": {
|
||||
"messages": [
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
|
||||
}
|
||||
]
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"tool_name": "addProduct",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"name": {
|
||||
"param_type": "string",
|
||||
"description": "Name of the product"
|
||||
},
|
||||
"price": {
|
||||
"param_type": "number",
|
||||
"description": "Price of the product"
|
||||
},
|
||||
"inStock": {
|
||||
"param_type": "boolean",
|
||||
"description": "Availability status of the product."
|
||||
},
|
||||
"tags": {
|
||||
"param_type": "list[str]",
|
||||
"description": "List of product tags"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_responses": [
|
||||
{
|
||||
"response": "{'response': 'Successfully added product with id: 123'}"
|
||||
}
|
||||
],
|
||||
"expected": [
|
||||
{
|
||||
"num_tool_calls": 1,
|
||||
"tool_name": "addProduct",
|
||||
"tool_arguments": {
|
||||
"name": "Widget",
|
||||
"price": 19.99,
|
||||
"inStock": true,
|
||||
"tags": [
|
||||
"new",
|
||||
"sale"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"num_tool_calls": 0,
|
||||
"answer": "123"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"sample_messages_tool_calling": {
|
||||
"data": {
|
||||
"messages": [
|
||||
|
@ -270,13 +231,19 @@
|
|||
],
|
||||
"tools": [
|
||||
{
|
||||
"tool_name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"location": {
|
||||
"param_type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
"required": true
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state (both required), e.g. San Francisco, CA."
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -343,18 +310,23 @@
|
|||
],
|
||||
"tools": [
|
||||
{
|
||||
"tool_name": "get_object_namespace_list",
|
||||
"description": "Get the list of objects in a namespace",
|
||||
"parameters": {
|
||||
"kind": {
|
||||
"param_type": "string",
|
||||
"description": "the type of object",
|
||||
"required": true
|
||||
},
|
||||
"namespace": {
|
||||
"param_type": "string",
|
||||
"description": "the name of the namespace",
|
||||
"required": true
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_object_namespace_list",
|
||||
"description": "Get the list of objects in a namespace",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kind": {
|
||||
"type": "string",
|
||||
"description": "the type of object"
|
||||
},
|
||||
"namespace": {
|
||||
"type": "string",
|
||||
"description": "the name of the namespace"
|
||||
}
|
||||
},
|
||||
"required": ["kind", "namespace"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,11 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
|
|||
uri = mcp_server["server_url"]
|
||||
|
||||
# registering should not raise an error anymore even if you don't specify the auth token
|
||||
try:
|
||||
llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
llama_stack_client.toolgroups.register(
|
||||
toolgroup_id=test_toolgroup_id,
|
||||
provider_id="model-context-protocol",
|
||||
|
|
|
@ -107,14 +107,34 @@ async def test_get_raw_document_text_deprecated_text_yaml_with_text_content_item
|
|||
assert "text/yaml" in str(w[0].message)
|
||||
|
||||
|
||||
async def test_get_raw_document_text_supports_json_mime_type():
|
||||
"""Test that the function accepts application/json mime type."""
|
||||
json_content = '{"name": "test", "version": "1.0", "items": ["item1", "item2"]}'
|
||||
|
||||
document = Document(content=json_content, mime_type="application/json")
|
||||
|
||||
result = await get_raw_document_text(document)
|
||||
assert result == json_content
|
||||
|
||||
|
||||
async def test_get_raw_document_text_with_json_text_content_item():
|
||||
"""Test that the function handles JSON TextContentItem correctly."""
|
||||
json_content = '{"key": "value", "nested": {"array": [1, 2, 3]}}'
|
||||
|
||||
document = Document(content=TextContentItem(text=json_content), mime_type="application/json")
|
||||
|
||||
result = await get_raw_document_text(document)
|
||||
assert result == json_content
|
||||
|
||||
|
||||
async def test_get_raw_document_text_rejects_unsupported_mime_types():
|
||||
"""Test that the function rejects unsupported mime types."""
|
||||
document = Document(
|
||||
content="Some content",
|
||||
mime_type="application/json", # Not supported
|
||||
mime_type="application/pdf", # Not supported
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Unexpected document mime type: application/json"):
|
||||
with pytest.raises(ValueError, match="Unexpected document mime type: application/pdf"):
|
||||
await get_raw_document_text(document)
|
||||
|
||||
|
||||
|
|
|
@ -16,9 +16,11 @@ from llama_stack.apis.agents import (
|
|||
)
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.tools import ListToolsResponse, Tool, ToolGroups, ToolParameter, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.providers.inline.agents.meta_reference.agent_instance import ChatAgent
|
||||
from llama_stack.providers.inline.agents.meta_reference.agents import MetaReferenceAgentsImpl
|
||||
from llama_stack.providers.inline.agents.meta_reference.config import MetaReferenceAgentsImplConfig
|
||||
from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo
|
||||
|
@ -75,11 +77,11 @@ def sample_agent_config():
|
|||
},
|
||||
input_shields=["string"],
|
||||
output_shields=["string"],
|
||||
toolgroups=["string"],
|
||||
toolgroups=["mcp::my_mcp_server"],
|
||||
client_tools=[
|
||||
{
|
||||
"name": "string",
|
||||
"description": "string",
|
||||
"name": "client_tool",
|
||||
"description": "Client Tool",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "string",
|
||||
|
@ -226,3 +228,83 @@ async def test_delete_agent(agents_impl, sample_agent_config):
|
|||
# Verify the agent was deleted
|
||||
with pytest.raises(ValueError):
|
||||
await agents_impl.get_agent(agent_id)
|
||||
|
||||
|
||||
async def test__initialize_tools(agents_impl, sample_agent_config):
|
||||
# Mock tool_groups_api.list_tools()
|
||||
agents_impl.tool_groups_api.list_tools.return_value = ListToolsResponse(
|
||||
data=[
|
||||
Tool(
|
||||
identifier="story_maker",
|
||||
provider_id="model-context-protocol",
|
||||
type=ResourceType.tool,
|
||||
toolgroup_id="mcp::my_mcp_server",
|
||||
description="Make a story",
|
||||
parameters=[
|
||||
ToolParameter(
|
||||
name="story_title",
|
||||
parameter_type="string",
|
||||
description="Title of the story",
|
||||
required=True,
|
||||
title="Story Title",
|
||||
),
|
||||
ToolParameter(
|
||||
name="input_words",
|
||||
parameter_type="array",
|
||||
description="Input words",
|
||||
required=False,
|
||||
items={"type": "string"},
|
||||
title="Input Words",
|
||||
default=[],
|
||||
),
|
||||
],
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
create_response = await agents_impl.create_agent(sample_agent_config)
|
||||
agent_id = create_response.agent_id
|
||||
|
||||
# Get an instance of ChatAgent
|
||||
chat_agent = await agents_impl._get_agent_impl(agent_id)
|
||||
assert chat_agent is not None
|
||||
assert isinstance(chat_agent, ChatAgent)
|
||||
|
||||
# Initialize tool definitions
|
||||
await chat_agent._initialize_tools()
|
||||
assert len(chat_agent.tool_defs) == 2
|
||||
|
||||
# Verify the first tool, which is a client tool
|
||||
first_tool = chat_agent.tool_defs[0]
|
||||
assert first_tool.tool_name == "client_tool"
|
||||
assert first_tool.description == "Client Tool"
|
||||
|
||||
# Verify the second tool, which is an MCP tool that has an array-type property
|
||||
second_tool = chat_agent.tool_defs[1]
|
||||
assert second_tool.tool_name == "story_maker"
|
||||
assert second_tool.description == "Make a story"
|
||||
|
||||
parameters = second_tool.parameters
|
||||
assert len(parameters) == 2
|
||||
|
||||
# Verify a string property
|
||||
story_title = parameters.get("story_title")
|
||||
assert story_title is not None
|
||||
assert story_title.param_type == "string"
|
||||
assert story_title.description == "Title of the story"
|
||||
assert story_title.required
|
||||
assert story_title.items is None
|
||||
assert story_title.title == "Story Title"
|
||||
assert story_title.default is None
|
||||
|
||||
# Verify an array property
|
||||
input_words = parameters.get("input_words")
|
||||
assert input_words is not None
|
||||
assert input_words.param_type == "array"
|
||||
assert input_words.description == "Input words"
|
||||
assert not input_words.required
|
||||
assert input_words.items is not None
|
||||
assert len(input_words.items) == 1
|
||||
assert input_words.items.get("type") == "string"
|
||||
assert input_words.title == "Input Words"
|
||||
assert input_words.default == []
|
||||
|
|
|
@ -2,6 +2,4 @@
|
|||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .batch_inference import *
|
||||
# the root directory of this source tree.
|
|
@ -0,0 +1,147 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Unit tests for MCP tool parameter conversion in streaming responses.
|
||||
|
||||
This tests the fix for handling array-type parameters with 'items' field
|
||||
when converting MCP tool definitions to OpenAI format.
|
||||
"""
|
||||
|
||||
from llama_stack.apis.tools import ToolDef, ToolParameter
|
||||
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
|
||||
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
|
||||
|
||||
|
||||
def test_mcp_tool_conversion_with_array_items():
|
||||
"""
|
||||
Test that MCP tool parameters with array type and items field are properly converted.
|
||||
|
||||
This is a regression test for the bug where array parameters without 'items'
|
||||
caused OpenAI API validation errors like:
|
||||
"Invalid schema for function 'pods_exec': In context=('properties', 'command'),
|
||||
array schema missing items."
|
||||
"""
|
||||
# Create a tool parameter with array type and items specification
|
||||
# This mimics what kubernetes-mcp-server's pods_exec tool has
|
||||
tool_param = ToolParameter(
|
||||
name="command",
|
||||
parameter_type="array",
|
||||
description="Command to execute in the pod",
|
||||
required=True,
|
||||
items={"type": "string"}, # This is the crucial field
|
||||
)
|
||||
|
||||
# Convert to ToolDefinition format (as done in streaming.py)
|
||||
tool_def = ToolDefinition(
|
||||
tool_name="test_tool",
|
||||
description="Test tool with array parameter",
|
||||
parameters={
|
||||
"command": ToolParamDefinition(
|
||||
param_type=tool_param.parameter_type,
|
||||
description=tool_param.description,
|
||||
required=tool_param.required,
|
||||
default=tool_param.default,
|
||||
items=tool_param.items, # The fix: ensure items is passed through
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
# Convert to OpenAI format
|
||||
openai_tool = convert_tooldef_to_openai_tool(tool_def)
|
||||
|
||||
# Verify the conversion includes the items field
|
||||
assert openai_tool["type"] == "function"
|
||||
assert openai_tool["function"]["name"] == "test_tool"
|
||||
assert "parameters" in openai_tool["function"]
|
||||
|
||||
parameters = openai_tool["function"]["parameters"]
|
||||
assert "properties" in parameters
|
||||
assert "command" in parameters["properties"]
|
||||
|
||||
command_param = parameters["properties"]["command"]
|
||||
assert command_param["type"] == "array"
|
||||
assert "items" in command_param, "Array parameter must have 'items' field for OpenAI API"
|
||||
assert command_param["items"] == {"type": "string"}
|
||||
|
||||
|
||||
def test_mcp_tool_conversion_without_array():
|
||||
"""Test that non-array parameters work correctly without items field."""
|
||||
tool_param = ToolParameter(
|
||||
name="name",
|
||||
parameter_type="string",
|
||||
description="Name parameter",
|
||||
required=True,
|
||||
)
|
||||
|
||||
tool_def = ToolDefinition(
|
||||
tool_name="test_tool",
|
||||
description="Test tool with string parameter",
|
||||
parameters={
|
||||
"name": ToolParamDefinition(
|
||||
param_type=tool_param.parameter_type,
|
||||
description=tool_param.description,
|
||||
required=tool_param.required,
|
||||
items=tool_param.items, # Will be None for non-array types
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
openai_tool = convert_tooldef_to_openai_tool(tool_def)
|
||||
|
||||
# Verify basic structure
|
||||
assert openai_tool["type"] == "function"
|
||||
parameters = openai_tool["function"]["parameters"]
|
||||
assert "name" in parameters["properties"]
|
||||
|
||||
name_param = parameters["properties"]["name"]
|
||||
assert name_param["type"] == "string"
|
||||
# items should not be present for non-array types
|
||||
assert "items" not in name_param or name_param.get("items") is None
|
||||
|
||||
|
||||
def test_mcp_tool_conversion_complex_array_items():
|
||||
"""Test array parameter with complex items schema (object type)."""
|
||||
tool_param = ToolParameter(
|
||||
name="configs",
|
||||
parameter_type="array",
|
||||
description="Array of configuration objects",
|
||||
required=False,
|
||||
items={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {"type": "string"},
|
||||
"value": {"type": "string"},
|
||||
},
|
||||
"required": ["key"],
|
||||
},
|
||||
)
|
||||
|
||||
tool_def = ToolDefinition(
|
||||
tool_name="test_tool",
|
||||
description="Test tool with complex array parameter",
|
||||
parameters={
|
||||
"configs": ToolParamDefinition(
|
||||
param_type=tool_param.parameter_type,
|
||||
description=tool_param.description,
|
||||
required=tool_param.required,
|
||||
items=tool_param.items,
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
openai_tool = convert_tooldef_to_openai_tool(tool_def)
|
||||
|
||||
# Verify complex items schema is preserved
|
||||
parameters = openai_tool["function"]["parameters"]
|
||||
configs_param = parameters["properties"]["configs"]
|
||||
|
||||
assert configs_param["type"] == "array"
|
||||
assert "items" in configs_param
|
||||
assert configs_param["items"]["type"] == "object"
|
||||
assert "properties" in configs_param["items"]
|
||||
assert "key" in configs_param["items"]["properties"]
|
||||
assert "value" in configs_param["items"]["properties"]
|
|
@ -4,11 +4,11 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from unittest.mock import MagicMock, PropertyMock, patch
|
||||
from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.inference import Model
|
||||
from llama_stack.apis.inference import Model, OpenAIUserMessageParam
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
|
@ -43,8 +43,17 @@ class OpenAIMixinWithEmbeddingsImpl(OpenAIMixin):
|
|||
|
||||
@pytest.fixture
|
||||
def mixin():
|
||||
"""Create a test instance of OpenAIMixin"""
|
||||
return OpenAIMixinImpl()
|
||||
"""Create a test instance of OpenAIMixin with mocked model_store"""
|
||||
mixin_instance = OpenAIMixinImpl()
|
||||
|
||||
# just enough to satisfy _get_provider_model_id calls
|
||||
mock_model_store = MagicMock()
|
||||
mock_model = MagicMock()
|
||||
mock_model.provider_resource_id = "test-provider-resource-id"
|
||||
mock_model_store.get_model = AsyncMock(return_value=mock_model)
|
||||
mixin_instance.model_store = mock_model_store
|
||||
|
||||
return mixin_instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -205,6 +214,74 @@ class TestOpenAIMixinCacheBehavior:
|
|||
assert "final-mock-model-id" in mixin._model_cache
|
||||
|
||||
|
||||
class TestOpenAIMixinImagePreprocessing:
|
||||
"""Test cases for image preprocessing functionality"""
|
||||
|
||||
async def test_openai_chat_completion_with_image_preprocessing_enabled(self, mixin):
|
||||
"""Test that image URLs are converted to base64 when download_images is True"""
|
||||
mixin.download_images = True
|
||||
|
||||
message = OpenAIUserMessageParam(
|
||||
role="user",
|
||||
content=[
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{"type": "image_url", "image_url": {"url": "http://example.com/image.jpg"}},
|
||||
],
|
||||
)
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch.object(type(mixin), "client", new_callable=PropertyMock, return_value=mock_client):
|
||||
with patch("llama_stack.providers.utils.inference.openai_mixin.localize_image_content") as mock_localize:
|
||||
mock_localize.return_value = (b"fake_image_data", "jpeg")
|
||||
|
||||
await mixin.openai_chat_completion(model="test-model", messages=[message])
|
||||
|
||||
mock_localize.assert_called_once_with("http://example.com/image.jpg")
|
||||
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
call_args = mock_client.chat.completions.create.call_args
|
||||
processed_messages = call_args[1]["messages"]
|
||||
assert len(processed_messages) == 1
|
||||
content = processed_messages[0]["content"]
|
||||
assert len(content) == 2
|
||||
assert content[0]["type"] == "text"
|
||||
assert content[1]["type"] == "image_url"
|
||||
assert content[1]["image_url"]["url"] == "data:image/jpeg;base64,ZmFrZV9pbWFnZV9kYXRh"
|
||||
|
||||
async def test_openai_chat_completion_with_image_preprocessing_disabled(self, mixin):
|
||||
"""Test that image URLs are not modified when download_images is False"""
|
||||
mixin.download_images = False # explicitly set to False
|
||||
|
||||
message = OpenAIUserMessageParam(
|
||||
role="user",
|
||||
content=[
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{"type": "image_url", "image_url": {"url": "http://example.com/image.jpg"}},
|
||||
],
|
||||
)
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch.object(type(mixin), "client", new_callable=PropertyMock, return_value=mock_client):
|
||||
with patch("llama_stack.providers.utils.inference.openai_mixin.localize_image_content") as mock_localize:
|
||||
await mixin.openai_chat_completion(model="test-model", messages=[message])
|
||||
|
||||
mock_localize.assert_not_called()
|
||||
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
call_args = mock_client.chat.completions.create.call_args
|
||||
processed_messages = call_args[1]["messages"]
|
||||
assert len(processed_messages) == 1
|
||||
content = processed_messages[0]["content"]
|
||||
assert len(content) == 2
|
||||
assert content[1]["image_url"]["url"] == "http://example.com/image.jpg"
|
||||
|
||||
|
||||
class TestOpenAIMixinEmbeddingModelMetadata:
|
||||
"""Test cases for embedding_model_metadata attribute functionality"""
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue