diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 665f8bd7e..0eb252695 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -34,22 +34,20 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install uv
- uses: astral-sh/setup-uv@22695119d769bdb6f7032ad67b9bca0ef8c4a174 # v5.4.0
+ uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
with:
python-version: "3.10"
- - name: Install Ollama
+ - name: Install and start Ollama
run: |
+ # the ollama installer also starts the ollama service
curl -fsSL https://ollama.com/install.sh | sh
- name: Pull Ollama image
run: |
+ # TODO: cache the model. OLLAMA_MODELS defaults to ~ollama/.ollama/models.
ollama pull llama3.2:3b-instruct-fp16
- - name: Start Ollama in background
- run: |
- nohup ollama run llama3.2:3b-instruct-fp16 > ollama.log 2>&1 &
-
- name: Set Up Environment and Install Dependencies
run: |
uv sync --extra dev --extra test
@@ -61,21 +59,6 @@ jobs:
uv pip install -e .
llama stack build --template ollama --image-type venv
- - name: Wait for Ollama to start
- run: |
- echo "Waiting for Ollama..."
- for i in {1..30}; do
- if curl -s http://localhost:11434 | grep -q "Ollama is running"; then
- echo "Ollama is running!"
- exit 0
- fi
- sleep 1
- done
- echo "Ollama failed to start"
- ollama ps
- ollama.log
- exit 1
-
- name: Start Llama Stack server in background
if: matrix.client-type == 'http'
env:
@@ -99,6 +82,17 @@ jobs:
cat server.log
exit 1
+ - name: Verify Ollama status is OK
+ if: matrix.client-type == 'http'
+ run: |
+ echo "Verifying Ollama status..."
+ ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
+ echo "Ollama status: $ollama_status"
+ if [ "$ollama_status" != "OK" ]; then
+ echo "Ollama health check failed"
+ exit 1
+ fi
+
- name: Run Integration Tests
env:
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 847aaecd7..17a42dd26 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -31,3 +31,12 @@ jobs:
- name: Verify if there are any diff files after pre-commit
run: |
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
+
+ - name: Verify if there are any new files after pre-commit
+ run: |
+ unstaged_files=$(git ls-files --others --exclude-standard)
+ if [ -n "$unstaged_files" ]; then
+ echo "There are uncommitted new files, run pre-commit locally and commit again"
+ echo "$unstaged_files"
+ exit 1
+ fi
diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml
index 915344221..ee532a94a 100644
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@@ -56,7 +56,7 @@ jobs:
python-version: '3.10'
- name: Install uv
- uses: astral-sh/setup-uv@22695119d769bdb6f7032ad67b9bca0ef8c4a174 # v5.4.0
+ uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
with:
python-version: "3.10"
@@ -81,3 +81,29 @@ jobs:
run: |
source test/bin/activate
uv pip list
+
+ build-single-provider:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+ with:
+ python-version: "3.10"
+
+ - name: Install LlamaStack
+ run: |
+ uv venv
+ source .venv/bin/activate
+ uv pip install -e .
+
+ - name: Build a single provider
+ run: |
+ USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --image-type venv --image-name test --providers inference=remote::ollama
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index da7289afc..4b0c58b99 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -38,7 +38,7 @@ jobs:
with:
python-version: ${{ matrix.python }}
- - uses: astral-sh/setup-uv@22695119d769bdb6f7032ad67b9bca0ef8c4a174 # v5.4.0
+ - uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
with:
python-version: ${{ matrix.python }}
enable-cache: false
diff --git a/.github/workflows/update-readthedocs.yml b/.github/workflows/update-readthedocs.yml
index 74bf0d0b0..794a727be 100644
--- a/.github/workflows/update-readthedocs.yml
+++ b/.github/workflows/update-readthedocs.yml
@@ -41,7 +41,7 @@ jobs:
python-version: '3.11'
- name: Install the latest version of uv
- uses: astral-sh/setup-uv@22695119d769bdb6f7032ad67b9bca0ef8c4a174 # v5.4.0
+ uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
- name: Sync with uv
run: uv sync --extra docs
diff --git a/README.md b/README.md
index 617e5117b..8c201e43d 100644
--- a/README.md
+++ b/README.md
@@ -9,15 +9,16 @@
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb)
-
### ✨🎉 Llama 4 Support 🎉✨
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
-You can now run Llama 4 models on Llama Stack.
+
+👋 Click here to see how to run Llama 4 models on Llama Stack
+
+\
*Note you need 8xH100 GPU-host to run these models*
-
```bash
pip install -U llama_stack
@@ -67,6 +68,9 @@ print(f"Assistant> {response.completion_message.content}")
As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
+
+
+
### Overview
Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 542fb5be5..54d888441 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -3096,11 +3096,18 @@
"post": {
"responses": {
"200": {
- "description": "OK",
+ "description": "Response from an OpenAI-compatible chat completion request. **OR** Chunk from a streaming response to an OpenAI-compatible chat completion request.",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/OpenAIChatCompletion"
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/OpenAIChatCompletion"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIChatCompletionChunk"
+ }
+ ]
}
}
}
@@ -7889,7 +7896,13 @@
"type": "object",
"properties": {
"status": {
- "type": "string"
+ "type": "string",
+ "enum": [
+ "OK",
+ "Error",
+ "Not Implemented"
+ ],
+ "title": "HealthStatus"
}
},
"additionalProperties": false,
@@ -8084,6 +8097,31 @@
}
]
}
+ },
+ "health": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
}
},
"additionalProperties": false,
@@ -8091,7 +8129,8 @@
"api",
"provider_id",
"provider_type",
- "config"
+ "config",
+ "health"
],
"title": "ProviderInfo"
},
@@ -8825,7 +8864,17 @@
"description": "Must be \"assistant\" to identify this as the model's response"
},
"content": {
- "$ref": "#/components/schemas/InterleavedContent",
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+ }
+ }
+ ],
"description": "The content of the model's response"
},
"name": {
@@ -8835,9 +8884,9 @@
"tool_calls": {
"type": "array",
"items": {
- "$ref": "#/components/schemas/ToolCall"
+ "$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
},
- "description": "List of tool calls. Each tool call is a ToolCall object."
+ "description": "List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."
}
},
"additionalProperties": false,
@@ -8848,6 +8897,98 @@
"title": "OpenAIAssistantMessageParam",
"description": "A message containing the model's (assistant) response in an OpenAI-compatible chat completion request."
},
+ "OpenAIChatCompletionContentPartImageParam": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "image_url",
+ "default": "image_url"
+ },
+ "image_url": {
+ "$ref": "#/components/schemas/OpenAIImageURL"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "image_url"
+ ],
+ "title": "OpenAIChatCompletionContentPartImageParam"
+ },
+ "OpenAIChatCompletionContentPartParam": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
+ "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+ }
+ }
+ },
+ "OpenAIChatCompletionContentPartTextParam": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "text",
+ "default": "text"
+ },
+ "text": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "text"
+ ],
+ "title": "OpenAIChatCompletionContentPartTextParam"
+ },
+ "OpenAIChatCompletionToolCall": {
+ "type": "object",
+ "properties": {
+ "index": {
+ "type": "integer"
+ },
+ "id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "function",
+ "default": "function"
+ },
+ "function": {
+ "$ref": "#/components/schemas/OpenAIChatCompletionToolCallFunction"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ],
+ "title": "OpenAIChatCompletionToolCall"
+ },
+ "OpenAIChatCompletionToolCallFunction": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "arguments": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "title": "OpenAIChatCompletionToolCallFunction"
+ },
"OpenAIDeveloperMessageParam": {
"type": "object",
"properties": {
@@ -8858,7 +8999,17 @@
"description": "Must be \"developer\" to identify this as a developer message"
},
"content": {
- "$ref": "#/components/schemas/InterleavedContent",
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+ }
+ }
+ ],
"description": "The content of the developer message"
},
"name": {
@@ -8874,6 +9025,66 @@
"title": "OpenAIDeveloperMessageParam",
"description": "A message from the developer in an OpenAI-compatible chat completion request."
},
+ "OpenAIImageURL": {
+ "type": "object",
+ "properties": {
+ "url": {
+ "type": "string"
+ },
+ "detail": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "url"
+ ],
+ "title": "OpenAIImageURL"
+ },
+ "OpenAIJSONSchema": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "description": {
+ "type": "string"
+ },
+ "strict": {
+ "type": "boolean"
+ },
+ "schema": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "name"
+ ],
+ "title": "OpenAIJSONSchema"
+ },
"OpenAIMessageParam": {
"oneOf": [
{
@@ -8903,6 +9114,76 @@
}
}
},
+ "OpenAIResponseFormatJSONObject": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "json_object",
+ "default": "json_object"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ],
+ "title": "OpenAIResponseFormatJSONObject"
+ },
+ "OpenAIResponseFormatJSONSchema": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "json_schema",
+ "default": "json_schema"
+ },
+ "json_schema": {
+ "$ref": "#/components/schemas/OpenAIJSONSchema"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "json_schema"
+ ],
+ "title": "OpenAIResponseFormatJSONSchema"
+ },
+ "OpenAIResponseFormatParam": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/OpenAIResponseFormatText"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseFormatJSONSchema"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseFormatJSONObject"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "text": "#/components/schemas/OpenAIResponseFormatText",
+ "json_schema": "#/components/schemas/OpenAIResponseFormatJSONSchema",
+ "json_object": "#/components/schemas/OpenAIResponseFormatJSONObject"
+ }
+ }
+ },
+ "OpenAIResponseFormatText": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "text",
+ "default": "text"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ],
+ "title": "OpenAIResponseFormatText"
+ },
"OpenAISystemMessageParam": {
"type": "object",
"properties": {
@@ -8913,7 +9194,17 @@
"description": "Must be \"system\" to identify this as a system message"
},
"content": {
- "$ref": "#/components/schemas/InterleavedContent",
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+ }
+ }
+ ],
"description": "The content of the \"system prompt\". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions)."
},
"name": {
@@ -8943,7 +9234,17 @@
"description": "Unique identifier for the tool call this response is for"
},
"content": {
- "$ref": "#/components/schemas/InterleavedContent",
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+ }
+ }
+ ],
"description": "The response content from the tool"
}
},
@@ -8966,7 +9267,17 @@
"description": "Must be \"user\" to identify this as a user message"
},
"content": {
- "$ref": "#/components/schemas/InterleavedContent",
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+ }
+ }
+ ],
"description": "The content of the message, which can include text and other media"
},
"name": {
@@ -9094,10 +9405,7 @@
"description": "(Optional) The penalty for repeated tokens"
},
"response_format": {
- "type": "object",
- "additionalProperties": {
- "type": "string"
- },
+ "$ref": "#/components/schemas/OpenAIResponseFormatParam",
"description": "(Optional) The response format to use"
},
"seed": {
@@ -9274,6 +9582,46 @@
"title": "OpenAIChatCompletion",
"description": "Response from an OpenAI-compatible chat completion request."
},
+ "OpenAIChatCompletionChunk": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The ID of the chat completion"
+ },
+ "choices": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIChunkChoice"
+ },
+ "description": "List of choices"
+ },
+ "object": {
+ "type": "string",
+ "const": "chat.completion.chunk",
+ "default": "chat.completion.chunk",
+ "description": "The object type, which will be \"chat.completion.chunk\""
+ },
+ "created": {
+ "type": "integer",
+ "description": "The Unix timestamp in seconds when the chat completion was created"
+ },
+ "model": {
+ "type": "string",
+ "description": "The model that was used to generate the chat completion"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "choices",
+ "object",
+ "created",
+ "model"
+ ],
+ "title": "OpenAIChatCompletionChunk",
+ "description": "Chunk from a streaming response to an OpenAI-compatible chat completion request."
+ },
"OpenAIChoice": {
"type": "object",
"properties": {
@@ -9286,10 +9634,12 @@
"description": "The reason the model stopped generating"
},
"index": {
- "type": "integer"
+ "type": "integer",
+ "description": "The index of the choice"
},
"logprobs": {
- "$ref": "#/components/schemas/OpenAIChoiceLogprobs"
+ "$ref": "#/components/schemas/OpenAIChoiceLogprobs",
+ "description": "(Optional) The log probabilities for the tokens in the message"
}
},
"additionalProperties": false,
@@ -9301,6 +9651,33 @@
"title": "OpenAIChoice",
"description": "A choice from an OpenAI-compatible chat completion response."
},
+ "OpenAIChoiceDelta": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "type": "string",
+ "description": "(Optional) The content of the delta"
+ },
+ "refusal": {
+ "type": "string",
+ "description": "(Optional) The refusal of the delta"
+ },
+ "role": {
+ "type": "string",
+ "description": "(Optional) The role of the delta"
+ },
+ "tool_calls": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
+ },
+ "description": "(Optional) The tool calls of the delta"
+ }
+ },
+ "additionalProperties": false,
+ "title": "OpenAIChoiceDelta",
+ "description": "A delta from an OpenAI-compatible chat completion streaming response."
+ },
"OpenAIChoiceLogprobs": {
"type": "object",
"properties": {
@@ -9308,19 +9685,50 @@
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAITokenLogProb"
- }
+ },
+ "description": "(Optional) The log probabilities for the tokens in the message"
},
"refusal": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAITokenLogProb"
- }
+ },
+ "description": "(Optional) The log probabilities for the tokens in the message"
}
},
"additionalProperties": false,
"title": "OpenAIChoiceLogprobs",
"description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
},
+ "OpenAIChunkChoice": {
+ "type": "object",
+ "properties": {
+ "delta": {
+ "$ref": "#/components/schemas/OpenAIChoiceDelta",
+ "description": "The delta from the chunk"
+ },
+ "finish_reason": {
+ "type": "string",
+ "description": "The reason the model stopped generating"
+ },
+ "index": {
+ "type": "integer",
+ "description": "The index of the choice"
+ },
+ "logprobs": {
+ "$ref": "#/components/schemas/OpenAIChoiceLogprobs",
+ "description": "(Optional) The log probabilities for the tokens in the message"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "delta",
+ "finish_reason",
+ "index"
+ ],
+ "title": "OpenAIChunkChoice",
+ "description": "A chunk choice from an OpenAI-compatible chat completion streaming response."
+ },
"OpenAITokenLogProb": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index fa7b130e2..cf657bff9 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -2135,11 +2135,15 @@ paths:
post:
responses:
'200':
- description: OK
+ description: >-
+ Response from an OpenAI-compatible chat completion request. **OR** Chunk
+ from a streaming response to an OpenAI-compatible chat completion request.
content:
application/json:
schema:
- $ref: '#/components/schemas/OpenAIChatCompletion'
+ oneOf:
+ - $ref: '#/components/schemas/OpenAIChatCompletion'
+ - $ref: '#/components/schemas/OpenAIChatCompletionChunk'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@@ -5463,6 +5467,11 @@ components:
properties:
status:
type: string
+ enum:
+ - OK
+ - Error
+ - Not Implemented
+ title: HealthStatus
additionalProperties: false
required:
- status
@@ -5574,12 +5583,23 @@ components:
- type: string
- type: array
- type: object
+ health:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
additionalProperties: false
required:
- api
- provider_id
- provider_type
- config
+ - health
title: ProviderInfo
InvokeToolRequest:
type: object
@@ -6057,7 +6077,11 @@ components:
description: >-
Must be "assistant" to identify this as the model's response
content:
- $ref: '#/components/schemas/InterleavedContent'
+ oneOf:
+ - type: string
+ - type: array
+ items:
+ $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: The content of the model's response
name:
type: string
@@ -6066,9 +6090,10 @@ components:
tool_calls:
type: array
items:
- $ref: '#/components/schemas/ToolCall'
+ $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
description: >-
- List of tool calls. Each tool call is a ToolCall object.
+ List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
+ object.
additionalProperties: false
required:
- role
@@ -6077,6 +6102,70 @@ components:
description: >-
A message containing the model's (assistant) response in an OpenAI-compatible
chat completion request.
+ "OpenAIChatCompletionContentPartImageParam":
+ type: object
+ properties:
+ type:
+ type: string
+ const: image_url
+ default: image_url
+ image_url:
+ $ref: '#/components/schemas/OpenAIImageURL'
+ additionalProperties: false
+ required:
+ - type
+ - image_url
+ title: >-
+ OpenAIChatCompletionContentPartImageParam
+ OpenAIChatCompletionContentPartParam:
+ oneOf:
+ - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+ - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+ discriminator:
+ propertyName: type
+ mapping:
+ text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+ image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+ OpenAIChatCompletionContentPartTextParam:
+ type: object
+ properties:
+ type:
+ type: string
+ const: text
+ default: text
+ text:
+ type: string
+ additionalProperties: false
+ required:
+ - type
+ - text
+ title: OpenAIChatCompletionContentPartTextParam
+ OpenAIChatCompletionToolCall:
+ type: object
+ properties:
+ index:
+ type: integer
+ id:
+ type: string
+ type:
+ type: string
+ const: function
+ default: function
+ function:
+ $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
+ additionalProperties: false
+ required:
+ - type
+ title: OpenAIChatCompletionToolCall
+ OpenAIChatCompletionToolCallFunction:
+ type: object
+ properties:
+ name:
+ type: string
+ arguments:
+ type: string
+ additionalProperties: false
+ title: OpenAIChatCompletionToolCallFunction
OpenAIDeveloperMessageParam:
type: object
properties:
@@ -6087,7 +6176,11 @@ components:
description: >-
Must be "developer" to identify this as a developer message
content:
- $ref: '#/components/schemas/InterleavedContent'
+ oneOf:
+ - type: string
+ - type: array
+ items:
+ $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: The content of the developer message
name:
type: string
@@ -6100,6 +6193,40 @@ components:
title: OpenAIDeveloperMessageParam
description: >-
A message from the developer in an OpenAI-compatible chat completion request.
+ OpenAIImageURL:
+ type: object
+ properties:
+ url:
+ type: string
+ detail:
+ type: string
+ additionalProperties: false
+ required:
+ - url
+ title: OpenAIImageURL
+ OpenAIJSONSchema:
+ type: object
+ properties:
+ name:
+ type: string
+ description:
+ type: string
+ strict:
+ type: boolean
+ schema:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - name
+ title: OpenAIJSONSchema
OpenAIMessageParam:
oneOf:
- $ref: '#/components/schemas/OpenAIUserMessageParam'
@@ -6115,6 +6242,53 @@ components:
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
tool: '#/components/schemas/OpenAIToolMessageParam'
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+ OpenAIResponseFormatJSONObject:
+ type: object
+ properties:
+ type:
+ type: string
+ const: json_object
+ default: json_object
+ additionalProperties: false
+ required:
+ - type
+ title: OpenAIResponseFormatJSONObject
+ OpenAIResponseFormatJSONSchema:
+ type: object
+ properties:
+ type:
+ type: string
+ const: json_schema
+ default: json_schema
+ json_schema:
+ $ref: '#/components/schemas/OpenAIJSONSchema'
+ additionalProperties: false
+ required:
+ - type
+ - json_schema
+ title: OpenAIResponseFormatJSONSchema
+ OpenAIResponseFormatParam:
+ oneOf:
+ - $ref: '#/components/schemas/OpenAIResponseFormatText'
+ - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+ - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+ discriminator:
+ propertyName: type
+ mapping:
+ text: '#/components/schemas/OpenAIResponseFormatText'
+ json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+ json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+ OpenAIResponseFormatText:
+ type: object
+ properties:
+ type:
+ type: string
+ const: text
+ default: text
+ additionalProperties: false
+ required:
+ - type
+ title: OpenAIResponseFormatText
OpenAISystemMessageParam:
type: object
properties:
@@ -6125,7 +6299,11 @@ components:
description: >-
Must be "system" to identify this as a system message
content:
- $ref: '#/components/schemas/InterleavedContent'
+ oneOf:
+ - type: string
+ - type: array
+ items:
+ $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: >-
The content of the "system prompt". If multiple system messages are provided,
they are concatenated. The underlying Llama Stack code may also add other
@@ -6155,7 +6333,11 @@ components:
description: >-
Unique identifier for the tool call this response is for
content:
- $ref: '#/components/schemas/InterleavedContent'
+ oneOf:
+ - type: string
+ - type: array
+ items:
+ $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: The response content from the tool
additionalProperties: false
required:
@@ -6176,7 +6358,11 @@ components:
description: >-
Must be "user" to identify this as a user message
content:
- $ref: '#/components/schemas/InterleavedContent'
+ oneOf:
+ - type: string
+ - type: array
+ items:
+ $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: >-
The content of the message, which can include text and other media
name:
@@ -6262,9 +6448,7 @@ components:
description: >-
(Optional) The penalty for repeated tokens
response_format:
- type: object
- additionalProperties:
- type: string
+ $ref: '#/components/schemas/OpenAIResponseFormatParam'
description: (Optional) The response format to use
seed:
type: integer
@@ -6370,6 +6554,41 @@ components:
title: OpenAIChatCompletion
description: >-
Response from an OpenAI-compatible chat completion request.
+ OpenAIChatCompletionChunk:
+ type: object
+ properties:
+ id:
+ type: string
+ description: The ID of the chat completion
+ choices:
+ type: array
+ items:
+ $ref: '#/components/schemas/OpenAIChunkChoice'
+ description: List of choices
+ object:
+ type: string
+ const: chat.completion.chunk
+ default: chat.completion.chunk
+ description: >-
+ The object type, which will be "chat.completion.chunk"
+ created:
+ type: integer
+ description: >-
+ The Unix timestamp in seconds when the chat completion was created
+ model:
+ type: string
+ description: >-
+ The model that was used to generate the chat completion
+ additionalProperties: false
+ required:
+ - id
+ - choices
+ - object
+ - created
+ - model
+ title: OpenAIChatCompletionChunk
+ description: >-
+ Chunk from a streaming response to an OpenAI-compatible chat completion request.
OpenAIChoice:
type: object
properties:
@@ -6381,8 +6600,11 @@ components:
description: The reason the model stopped generating
index:
type: integer
+ description: The index of the choice
logprobs:
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
+ description: >-
+ (Optional) The log probabilities for the tokens in the message
additionalProperties: false
required:
- message
@@ -6391,6 +6613,27 @@ components:
title: OpenAIChoice
description: >-
A choice from an OpenAI-compatible chat completion response.
+ OpenAIChoiceDelta:
+ type: object
+ properties:
+ content:
+ type: string
+ description: (Optional) The content of the delta
+ refusal:
+ type: string
+ description: (Optional) The refusal of the delta
+ role:
+ type: string
+ description: (Optional) The role of the delta
+ tool_calls:
+ type: array
+ items:
+ $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+ description: (Optional) The tool calls of the delta
+ additionalProperties: false
+ title: OpenAIChoiceDelta
+ description: >-
+ A delta from an OpenAI-compatible chat completion streaming response.
OpenAIChoiceLogprobs:
type: object
properties:
@@ -6398,15 +6641,43 @@ components:
type: array
items:
$ref: '#/components/schemas/OpenAITokenLogProb'
+ description: >-
+ (Optional) The log probabilities for the tokens in the message
refusal:
type: array
items:
$ref: '#/components/schemas/OpenAITokenLogProb'
+ description: >-
+ (Optional) The log probabilities for the tokens in the message
additionalProperties: false
title: OpenAIChoiceLogprobs
description: >-
The log probabilities for the tokens in the message from an OpenAI-compatible
chat completion response.
+ OpenAIChunkChoice:
+ type: object
+ properties:
+ delta:
+ $ref: '#/components/schemas/OpenAIChoiceDelta'
+ description: The delta from the chunk
+ finish_reason:
+ type: string
+ description: The reason the model stopped generating
+ index:
+ type: integer
+ description: The index of the choice
+ logprobs:
+ $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+ description: >-
+ (Optional) The log probabilities for the tokens in the message
+ additionalProperties: false
+ required:
+ - delta
+ - finish_reason
+ - index
+ title: OpenAIChunkChoice
+ description: >-
+ A chunk choice from an OpenAI-compatible chat completion streaming response.
OpenAITokenLogProb:
type: object
properties:
diff --git a/docs/source/distributions/ondevice_distro/android_sdk.md b/docs/source/distributions/ondevice_distro/android_sdk.md
index 4fa6eaf70..a097a2adf 100644
--- a/docs/source/distributions/ondevice_distro/android_sdk.md
+++ b/docs/source/distributions/ondevice_distro/android_sdk.md
@@ -24,7 +24,7 @@ The key files in the app are `ExampleLlamaStackLocalInference.kt`, `ExampleLlama
Add the following dependency in your `build.gradle.kts` file:
```
dependencies {
- implementation("com.llama.llamastack:llama-stack-client-kotlin:0.1.4.2")
+ implementation("com.llama.llamastack:llama-stack-client-kotlin:0.2.2")
}
```
This will download jar files in your gradle cache in a directory like `~/.gradle/caches/modules-2/files-2.1/com.llama.llamastack/`
@@ -37,11 +37,7 @@ For local inferencing, it is required to include the ExecuTorch library into you
Include the ExecuTorch library by:
1. Download the `download-prebuilt-et-lib.sh` script file from the [llama-stack-client-kotlin-client-local](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release/llama-stack-client-kotlin-client-local/download-prebuilt-et-lib.sh) directory to your local machine.
-2. Move the script to the top level of your Android app where the app directory resides:
-
-
-
-
+2. Move the script to the top level of your Android app where the `app` directory resides.
3. Run `sh download-prebuilt-et-lib.sh` to create an `app/libs` directory and download the `executorch.aar` in that path. This generates an ExecuTorch library for the XNNPACK delegate.
4. Add the `executorch.aar` dependency in your `build.gradle.kts` file:
```
@@ -52,6 +48,8 @@ dependencies {
}
```
+See other dependencies for the local RAG in Android app [README](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release/examples/android_app#quick-start).
+
## Llama Stack APIs in Your Android App
Breaking down the demo app, this section will show the core pieces that are used to initialize and run inference with Llama Stack using the Kotlin library.
@@ -60,7 +58,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
```
conda create -n stack-fireworks python=3.10
conda activate stack-fireworks
-pip install --no-cache llama-stack==0.1.4
+pip install --no-cache llama-stack==0.2.2
llama stack build --template fireworks --image-type conda
export FIREWORKS_API_KEY=
llama stack run fireworks --port 5050
diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md
index 4f5a8a859..b18be1b2f 100644
--- a/docs/source/distributions/self_hosted_distro/groq.md
+++ b/docs/source/distributions/self_hosted_distro/groq.md
@@ -43,7 +43,9 @@ The following models are available by default:
- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `groq/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
+- `groq/meta-llama/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
- `groq/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
+- `groq/meta-llama/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
### Prerequisite: API Keys
diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index e18b5bf40..efa443778 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -41,7 +41,7 @@ The following environment variables can be configured:
## Setting up vLLM server
-In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM
+In the following sections, we'll use AMD, NVIDIA or Intel GPUs to serve as hardware accelerators for the vLLM
server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
[supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
that we only use GPUs here for demonstration purposes.
@@ -162,6 +162,55 @@ docker run \
--port $SAFETY_PORT
```
+### Setting up vLLM server on Intel GPU
+
+Refer to [vLLM Documentation for XPU](https://docs.vllm.ai/en/v0.8.2/getting_started/installation/gpu.html?device=xpu) to get a vLLM endpoint. In addition to vLLM side setup which guides towards installing vLLM from sources orself-building vLLM Docker container, Intel provides prebuilt vLLM container to use on systems with Intel GPUs supported by PyTorch XPU backend:
+- [intel/vllm](https://hub.docker.com/r/intel/vllm)
+
+Here is a sample script to start a vLLM server locally via Docker using Intel provided container:
+
+```bash
+export INFERENCE_PORT=8000
+export INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
+export ZE_AFFINITY_MASK=0
+
+docker run \
+ --pull always \
+ --device /dev/dri \
+ -v /dev/dri/by-path:/dev/dri/by-path \
+ -v ~/.cache/huggingface:/root/.cache/huggingface \
+ --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+ --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+ -p $INFERENCE_PORT:$INFERENCE_PORT \
+ --ipc=host \
+ intel/vllm:xpu \
+ --gpu-memory-utilization 0.7 \
+ --model $INFERENCE_MODEL \
+ --port $INFERENCE_PORT
+```
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_PORT=8081
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export ZE_AFFINITY_MASK=1
+
+docker run \
+ --pull always \
+ --device /dev/dri \
+ -v /dev/dri/by-path:/dev/dri/by-path \
+ -v ~/.cache/huggingface:/root/.cache/huggingface \
+ --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+ --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+ -p $SAFETY_PORT:$SAFETY_PORT \
+ --ipc=host \
+ intel/vllm:xpu \
+ --gpu-memory-utilization 0.7 \
+ --model $SAFETY_MODEL \
+ --port $SAFETY_PORT
+```
+
## Running Llama Stack
Now you are ready to run Llama Stack with vLLM as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 21753ca23..596efb136 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -18,7 +18,7 @@ from typing import (
)
from pydantic import BaseModel, Field, field_validator
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypedDict
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
from llama_stack.apis.models import Model
@@ -442,6 +442,37 @@ class EmbeddingsResponse(BaseModel):
embeddings: List[List[float]]
+@json_schema_type
+class OpenAIChatCompletionContentPartTextParam(BaseModel):
+ type: Literal["text"] = "text"
+ text: str
+
+
+@json_schema_type
+class OpenAIImageURL(BaseModel):
+ url: str
+ detail: Optional[str] = None
+
+
+@json_schema_type
+class OpenAIChatCompletionContentPartImageParam(BaseModel):
+ type: Literal["image_url"] = "image_url"
+ image_url: OpenAIImageURL
+
+
+OpenAIChatCompletionContentPartParam = Annotated[
+ Union[
+ OpenAIChatCompletionContentPartTextParam,
+ OpenAIChatCompletionContentPartImageParam,
+ ],
+ Field(discriminator="type"),
+]
+register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam")
+
+
+OpenAIChatCompletionMessageContent = Union[str, List[OpenAIChatCompletionContentPartParam]]
+
+
@json_schema_type
class OpenAIUserMessageParam(BaseModel):
"""A message from the user in an OpenAI-compatible chat completion request.
@@ -452,7 +483,7 @@ class OpenAIUserMessageParam(BaseModel):
"""
role: Literal["user"] = "user"
- content: InterleavedContent
+ content: OpenAIChatCompletionMessageContent
name: Optional[str] = None
@@ -466,10 +497,24 @@ class OpenAISystemMessageParam(BaseModel):
"""
role: Literal["system"] = "system"
- content: InterleavedContent
+ content: OpenAIChatCompletionMessageContent
name: Optional[str] = None
+@json_schema_type
+class OpenAIChatCompletionToolCallFunction(BaseModel):
+ name: Optional[str] = None
+ arguments: Optional[str] = None
+
+
+@json_schema_type
+class OpenAIChatCompletionToolCall(BaseModel):
+ index: Optional[int] = None
+ id: Optional[str] = None
+ type: Literal["function"] = "function"
+ function: Optional[OpenAIChatCompletionToolCallFunction] = None
+
+
@json_schema_type
class OpenAIAssistantMessageParam(BaseModel):
"""A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
@@ -477,13 +522,13 @@ class OpenAIAssistantMessageParam(BaseModel):
:param role: Must be "assistant" to identify this as the model's response
:param content: The content of the model's response
:param name: (Optional) The name of the assistant message participant.
- :param tool_calls: List of tool calls. Each tool call is a ToolCall object.
+ :param tool_calls: List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object.
"""
role: Literal["assistant"] = "assistant"
- content: InterleavedContent
+ content: OpenAIChatCompletionMessageContent
name: Optional[str] = None
- tool_calls: Optional[List[ToolCall]] = Field(default_factory=list)
+ tool_calls: Optional[List[OpenAIChatCompletionToolCall]] = Field(default_factory=list)
@json_schema_type
@@ -497,7 +542,7 @@ class OpenAIToolMessageParam(BaseModel):
role: Literal["tool"] = "tool"
tool_call_id: str
- content: InterleavedContent
+ content: OpenAIChatCompletionMessageContent
@json_schema_type
@@ -510,7 +555,7 @@ class OpenAIDeveloperMessageParam(BaseModel):
"""
role: Literal["developer"] = "developer"
- content: InterleavedContent
+ content: OpenAIChatCompletionMessageContent
name: Optional[str] = None
@@ -527,6 +572,46 @@ OpenAIMessageParam = Annotated[
register_schema(OpenAIMessageParam, name="OpenAIMessageParam")
+@json_schema_type
+class OpenAIResponseFormatText(BaseModel):
+ type: Literal["text"] = "text"
+
+
+@json_schema_type
+class OpenAIJSONSchema(TypedDict, total=False):
+ name: str
+ description: Optional[str] = None
+ strict: Optional[bool] = None
+
+ # Pydantic BaseModel cannot be used with a schema param, since it already
+ # has one. And, we don't want to alias here because then have to handle
+ # that alias when converting to OpenAI params. So, to support schema,
+ # we use a TypedDict.
+ schema: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+class OpenAIResponseFormatJSONSchema(BaseModel):
+ type: Literal["json_schema"] = "json_schema"
+ json_schema: OpenAIJSONSchema
+
+
+@json_schema_type
+class OpenAIResponseFormatJSONObject(BaseModel):
+ type: Literal["json_object"] = "json_object"
+
+
+OpenAIResponseFormatParam = Annotated[
+ Union[
+ OpenAIResponseFormatText,
+ OpenAIResponseFormatJSONSchema,
+ OpenAIResponseFormatJSONObject,
+ ],
+ Field(discriminator="type"),
+]
+register_schema(OpenAIResponseFormatParam, name="OpenAIResponseFormatParam")
+
+
@json_schema_type
class OpenAITopLogProb(BaseModel):
"""The top log probability for a token from an OpenAI-compatible chat completion response.
@@ -561,22 +646,54 @@ class OpenAITokenLogProb(BaseModel):
class OpenAIChoiceLogprobs(BaseModel):
"""The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
- :content: (Optional) The log probabilities for the tokens in the message
- :refusal: (Optional) The log probabilities for the tokens in the message
+ :param content: (Optional) The log probabilities for the tokens in the message
+ :param refusal: (Optional) The log probabilities for the tokens in the message
"""
content: Optional[List[OpenAITokenLogProb]] = None
refusal: Optional[List[OpenAITokenLogProb]] = None
+@json_schema_type
+class OpenAIChoiceDelta(BaseModel):
+ """A delta from an OpenAI-compatible chat completion streaming response.
+
+ :param content: (Optional) The content of the delta
+ :param refusal: (Optional) The refusal of the delta
+ :param role: (Optional) The role of the delta
+ :param tool_calls: (Optional) The tool calls of the delta
+ """
+
+ content: Optional[str] = None
+ refusal: Optional[str] = None
+ role: Optional[str] = None
+ tool_calls: Optional[List[OpenAIChatCompletionToolCall]] = None
+
+
+@json_schema_type
+class OpenAIChunkChoice(BaseModel):
+ """A chunk choice from an OpenAI-compatible chat completion streaming response.
+
+ :param delta: The delta from the chunk
+ :param finish_reason: The reason the model stopped generating
+ :param index: The index of the choice
+ :param logprobs: (Optional) The log probabilities for the tokens in the message
+ """
+
+ delta: OpenAIChoiceDelta
+ finish_reason: str
+ index: int
+ logprobs: Optional[OpenAIChoiceLogprobs] = None
+
+
@json_schema_type
class OpenAIChoice(BaseModel):
"""A choice from an OpenAI-compatible chat completion response.
:param message: The message from the model
:param finish_reason: The reason the model stopped generating
- :index: The index of the choice
- :logprobs: (Optional) The log probabilities for the tokens in the message
+ :param index: The index of the choice
+ :param logprobs: (Optional) The log probabilities for the tokens in the message
"""
message: OpenAIMessageParam
@@ -603,6 +720,24 @@ class OpenAIChatCompletion(BaseModel):
model: str
+@json_schema_type
+class OpenAIChatCompletionChunk(BaseModel):
+ """Chunk from a streaming response to an OpenAI-compatible chat completion request.
+
+ :param id: The ID of the chat completion
+ :param choices: List of choices
+ :param object: The object type, which will be "chat.completion.chunk"
+ :param created: The Unix timestamp in seconds when the chat completion was created
+ :param model: The model that was used to generate the chat completion
+ """
+
+ id: str
+ choices: List[OpenAIChunkChoice]
+ object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
+ created: int
+ model: str
+
+
@json_schema_type
class OpenAICompletionLogprobs(BaseModel):
"""The log probabilities for the tokens in the message from an OpenAI-compatible completion response.
@@ -872,7 +1007,7 @@ class Inference(Protocol):
n: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
- response_format: Optional[Dict[str, str]] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
@@ -883,7 +1018,7 @@ class Inference(Protocol):
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
user: Optional[str] = None,
- ) -> OpenAIChatCompletion:
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
"""Generate an OpenAI-compatible chat completion for the given messages using the specified model.
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
diff --git a/llama_stack/apis/inspect/inspect.py b/llama_stack/apis/inspect/inspect.py
index 3896d67a9..863f90e14 100644
--- a/llama_stack/apis/inspect/inspect.py
+++ b/llama_stack/apis/inspect/inspect.py
@@ -8,6 +8,7 @@ from typing import List, Protocol, runtime_checkable
from pydantic import BaseModel
+from llama_stack.providers.datatypes import HealthStatus
from llama_stack.schema_utils import json_schema_type, webmethod
@@ -20,8 +21,7 @@ class RouteInfo(BaseModel):
@json_schema_type
class HealthInfo(BaseModel):
- status: str
- # TODO: add a provider level status
+ status: HealthStatus
@json_schema_type
diff --git a/llama_stack/apis/providers/providers.py b/llama_stack/apis/providers/providers.py
index 83d03d7c1..ea5f968ec 100644
--- a/llama_stack/apis/providers/providers.py
+++ b/llama_stack/apis/providers/providers.py
@@ -8,6 +8,7 @@ from typing import Any, Dict, List, Protocol, runtime_checkable
from pydantic import BaseModel
+from llama_stack.providers.datatypes import HealthResponse
from llama_stack.schema_utils import json_schema_type, webmethod
@@ -17,6 +18,7 @@ class ProviderInfo(BaseModel):
provider_id: str
provider_type: str
config: Dict[str, Any]
+ health: HealthResponse
class ListProvidersResponse(BaseModel):
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
index ac1933e0e..3251bc632 100644
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@@ -89,6 +89,43 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
color="red",
)
sys.exit(1)
+ elif args.providers:
+ providers = dict()
+ for api_provider in args.providers.split(","):
+ if "=" not in api_provider:
+ cprint(
+ "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
+ color="red",
+ )
+ sys.exit(1)
+ api, provider = api_provider.split("=")
+ providers_for_api = get_provider_registry().get(Api(api), None)
+ if providers_for_api is None:
+ cprint(
+ f"{api} is not a valid API.",
+ color="red",
+ )
+ sys.exit(1)
+ if provider in providers_for_api:
+ providers.setdefault(api, []).append(provider)
+ else:
+ cprint(
+ f"{provider} is not a valid provider for the {api} API.",
+ color="red",
+ )
+ sys.exit(1)
+ distribution_spec = DistributionSpec(
+ providers=providers,
+ description=",".join(args.providers),
+ )
+ if not args.image_type:
+ cprint(
+ f"Please specify a image-type (container | conda | venv) for {args.template}",
+ color="red",
+ )
+ sys.exit(1)
+
+ build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec)
elif not args.config and not args.template:
name = prompt(
"> Enter a name for your Llama Stack (e.g. my-local-stack): ",
diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py
index c511a0682..93e7d9b22 100644
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@@ -75,6 +75,12 @@ the build. If not specified, currently active environment will be used if found.
default=False,
help="Run the stack after building using the same image type, name, and other applicable arguments",
)
+ self.parser.add_argument(
+ "--providers",
+ type=str,
+ default=None,
+ help="Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
+ )
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
# always keep implementation completely silo-ed away from CLI so CLI
diff --git a/llama_stack/distribution/inspect.py b/llama_stack/distribution/inspect.py
index ba0ce5ea2..23f644ec6 100644
--- a/llama_stack/distribution/inspect.py
+++ b/llama_stack/distribution/inspect.py
@@ -17,6 +17,7 @@ from llama_stack.apis.inspect import (
)
from llama_stack.distribution.datatypes import StackRunConfig
from llama_stack.distribution.server.endpoints import get_all_api_endpoints
+from llama_stack.providers.datatypes import HealthStatus
class DistributionInspectConfig(BaseModel):
@@ -58,7 +59,7 @@ class DistributionInspectImpl(Inspect):
return ListRoutesResponse(data=ret)
async def health(self) -> HealthInfo:
- return HealthInfo(status="OK")
+ return HealthInfo(status=HealthStatus.OK)
async def version(self) -> VersionInfo:
return VersionInfo(version=version("llama-stack"))
diff --git a/llama_stack/distribution/library_client.py b/llama_stack/distribution/library_client.py
index c0143363d..f426bcafe 100644
--- a/llama_stack/distribution/library_client.py
+++ b/llama_stack/distribution/library_client.py
@@ -43,9 +43,9 @@ from llama_stack.distribution.server.endpoints import (
from llama_stack.distribution.stack import (
construct_stack,
get_stack_run_config_from_template,
- redact_sensitive_fields,
replace_env_vars,
)
+from llama_stack.distribution.utils.config import redact_sensitive_fields
from llama_stack.distribution.utils.context import preserve_contexts_async_generator
from llama_stack.distribution.utils.exec import in_notebook
from llama_stack.providers.utils.telemetry.tracing import (
diff --git a/llama_stack/distribution/providers.py b/llama_stack/distribution/providers.py
index cf9b0b975..1c00ce264 100644
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
@@ -4,14 +4,17 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+import asyncio
+from typing import Any, Dict
from pydantic import BaseModel
from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import HealthResponse, HealthStatus
from .datatypes import StackRunConfig
-from .stack import redact_sensitive_fields
+from .utils.config import redact_sensitive_fields
logger = get_logger(name=__name__, category="core")
@@ -41,19 +44,24 @@ class ProviderImpl(Providers):
async def list_providers(self) -> ListProvidersResponse:
run_config = self.config.run_config
safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump()))
+ providers_health = await self.get_providers_health()
ret = []
for api, providers in safe_config.providers.items():
- ret.extend(
- [
+ for p in providers:
+ ret.append(
ProviderInfo(
api=api,
provider_id=p.provider_id,
provider_type=p.provider_type,
config=p.config,
+ health=providers_health.get(api, {}).get(
+ p.provider_id,
+ HealthResponse(
+ status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check"
+ ),
+ ),
)
- for p in providers
- ]
- )
+ )
return ListProvidersResponse(data=ret)
@@ -64,3 +72,57 @@ class ProviderImpl(Providers):
return p
raise ValueError(f"Provider {provider_id} not found")
+
+ async def get_providers_health(self) -> Dict[str, Dict[str, HealthResponse]]:
+ """Get health status for all providers.
+
+ Returns:
+ Dict[str, Dict[str, HealthResponse]]: A dictionary mapping API names to provider health statuses.
+ Each API maps to a dictionary of provider IDs to their health responses.
+ """
+ providers_health: Dict[str, Dict[str, HealthResponse]] = {}
+ timeout = 1.0
+
+ async def check_provider_health(impl: Any) -> tuple[str, HealthResponse] | None:
+ # Skip special implementations (inspect/providers) that don't have provider specs
+ if not hasattr(impl, "__provider_spec__"):
+ return None
+ api_name = impl.__provider_spec__.api.name
+ if not hasattr(impl, "health"):
+ return (
+ api_name,
+ HealthResponse(
+ status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check"
+ ),
+ )
+
+ try:
+ health = await asyncio.wait_for(impl.health(), timeout=timeout)
+ return api_name, health
+ except asyncio.TimeoutError:
+ return (
+ api_name,
+ HealthResponse(
+ status=HealthStatus.ERROR, message=f"Health check timed out after {timeout} seconds"
+ ),
+ )
+ except Exception as e:
+ return (
+ api_name,
+ HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"),
+ )
+
+ # Create tasks for all providers
+ tasks = [check_provider_health(impl) for impl in self.deps.values()]
+
+ # Wait for all health checks to complete
+ results = await asyncio.gather(*tasks)
+
+ # Organize results by API and provider ID
+ for result in results:
+ if result is None: # Skip special implementations
+ continue
+ api_name, health_response = result
+ providers_health[api_name] = health_response
+
+ return providers_health
diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py
index 0de1e0a02..e9a594eba 100644
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@@ -41,7 +41,6 @@ from llama_stack.providers.datatypes import (
Api,
BenchmarksProtocolPrivate,
DatasetsProtocolPrivate,
- InlineProviderSpec,
ModelsProtocolPrivate,
ProviderSpec,
RemoteProviderConfig,
@@ -230,46 +229,6 @@ def sort_providers_by_deps(
{k: list(v.values()) for k, v in providers_with_specs.items()}
)
- # Append built-in "inspect" provider
- apis = [x[1].spec.api for x in sorted_providers]
- sorted_providers.append(
- (
- "inspect",
- ProviderWithSpec(
- provider_id="__builtin__",
- provider_type="__builtin__",
- config={"run_config": run_config.model_dump()},
- spec=InlineProviderSpec(
- api=Api.inspect,
- provider_type="__builtin__",
- config_class="llama_stack.distribution.inspect.DistributionInspectConfig",
- module="llama_stack.distribution.inspect",
- api_dependencies=apis,
- deps__=[x.value for x in apis],
- ),
- ),
- )
- )
-
- sorted_providers.append(
- (
- "providers",
- ProviderWithSpec(
- provider_id="__builtin__",
- provider_type="__builtin__",
- config={"run_config": run_config.model_dump()},
- spec=InlineProviderSpec(
- api=Api.providers,
- provider_type="__builtin__",
- config_class="llama_stack.distribution.providers.ProviderImplConfig",
- module="llama_stack.distribution.providers",
- api_dependencies=apis,
- deps__=[x.value for x in apis],
- ),
- ),
- )
- )
-
logger.debug(f"Resolved {len(sorted_providers)} providers")
for api_str, provider in sorted_providers:
logger.debug(f" {api_str} => {provider.provider_id}")
diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
index b9623ef3c..17aecdaf8 100644
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+import asyncio
import time
from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
@@ -37,7 +38,13 @@ from llama_stack.apis.inference import (
ToolDefinition,
ToolPromptFormat,
)
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+ OpenAIChatCompletion,
+ OpenAIChatCompletionChunk,
+ OpenAICompletion,
+ OpenAIMessageParam,
+ OpenAIResponseFormatParam,
+)
from llama_stack.apis.models import Model, ModelType
from llama_stack.apis.safety import RunShieldResponse, Safety
from llama_stack.apis.scoring import (
@@ -60,7 +67,7 @@ from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.log import get_logger
from llama_stack.models.llama.llama3.chat_format import ChatFormat
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.datatypes import RoutingTable
+from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
from llama_stack.providers.utils.telemetry.tracing import get_current_span
logger = get_logger(name=__name__, category="core")
@@ -530,7 +537,7 @@ class InferenceRouter(Inference):
n: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
- response_format: Optional[Dict[str, str]] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
@@ -541,7 +548,7 @@ class InferenceRouter(Inference):
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
user: Optional[str] = None,
- ) -> OpenAIChatCompletion:
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
logger.debug(
f"InferenceRouter.openai_chat_completion: {model=}, {stream=}, {messages=}",
)
@@ -580,6 +587,29 @@ class InferenceRouter(Inference):
provider = self.routing_table.get_provider_impl(model_obj.identifier)
return await provider.openai_chat_completion(**params)
+ async def health(self) -> Dict[str, HealthResponse]:
+ health_statuses = {}
+ timeout = 0.5
+ for provider_id, impl in self.routing_table.impls_by_provider_id.items():
+ try:
+ # check if the provider has a health method
+ if not hasattr(impl, "health"):
+ continue
+ health = await asyncio.wait_for(impl.health(), timeout=timeout)
+ health_statuses[provider_id] = health
+ except asyncio.TimeoutError:
+ health_statuses[provider_id] = HealthResponse(
+ status=HealthStatus.ERROR,
+ message=f"Health check timed out after {timeout} seconds",
+ )
+ except NotImplementedError:
+ health_statuses[provider_id] = HealthResponse(status=HealthStatus.NOT_IMPLEMENTED)
+ except Exception as e:
+ health_statuses[provider_id] = HealthResponse(
+ status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"
+ )
+ return health_statuses
+
class SafetyRouter(Safety):
def __init__(
diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py
index 7d4ec2a2f..9bbb2ce88 100644
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@@ -38,10 +38,10 @@ from llama_stack.distribution.server.endpoints import (
)
from llama_stack.distribution.stack import (
construct_stack,
- redact_sensitive_fields,
replace_env_vars,
validate_env_pair,
)
+from llama_stack.distribution.utils.config import redact_sensitive_fields
from llama_stack.distribution.utils.context import preserve_contexts_async_generator
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api
@@ -229,15 +229,30 @@ class TracingMiddleware:
def __init__(self, app, impls):
self.app = app
self.impls = impls
+ # FastAPI built-in paths that should bypass custom routing
+ self.fastapi_paths = ("/docs", "/redoc", "/openapi.json", "/favicon.ico", "/static")
async def __call__(self, scope, receive, send):
if scope.get("type") == "lifespan":
return await self.app(scope, receive, send)
path = scope.get("path", "")
+
+ # Check if the path is a FastAPI built-in path
+ if path.startswith(self.fastapi_paths):
+ # Pass through to FastAPI's built-in handlers
+ logger.debug(f"Bypassing custom routing for FastAPI built-in path: {path}")
+ return await self.app(scope, receive, send)
+
if not hasattr(self, "endpoint_impls"):
self.endpoint_impls = initialize_endpoint_impls(self.impls)
- _, _, trace_path = find_matching_endpoint(scope.get("method", "GET"), path, self.endpoint_impls)
+
+ try:
+ _, _, trace_path = find_matching_endpoint(scope.get("method", "GET"), path, self.endpoint_impls)
+ except ValueError:
+ # If no matching endpoint is found, pass through to FastAPI
+ logger.debug(f"No matching endpoint found for path: {path}, falling back to FastAPI")
+ return await self.app(scope, receive, send)
trace_context = await start_trace(trace_path, {"__location__": "server", "raw_path": path})
@@ -388,7 +403,12 @@ def main(args: Optional[argparse.Namespace] = None):
safe_config = redact_sensitive_fields(config.model_dump())
logger.info(yaml.dump(safe_config, indent=2))
- app = FastAPI(lifespan=lifespan)
+ app = FastAPI(
+ lifespan=lifespan,
+ docs_url="/docs",
+ redoc_url="/redoc",
+ openapi_url="/openapi.json",
+ )
if not os.environ.get("LLAMA_STACK_DISABLE_VERSION_CHECK"):
app.add_middleware(ClientVersionMiddleware)
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 08ff5e7cd..a6dc3d2a0 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -35,6 +35,8 @@ from llama_stack.apis.vector_dbs import VectorDBs
from llama_stack.apis.vector_io import VectorIO
from llama_stack.distribution.datatypes import Provider, StackRunConfig
from llama_stack.distribution.distribution import get_provider_registry
+from llama_stack.distribution.inspect import DistributionInspectConfig, DistributionInspectImpl
+from llama_stack.distribution.providers import ProviderImpl, ProviderImplConfig
from llama_stack.distribution.resolver import ProviderRegistry, resolve_impls
from llama_stack.distribution.store.registry import create_dist_registry
from llama_stack.distribution.utils.dynamic import instantiate_class_type
@@ -119,26 +121,6 @@ class EnvVarError(Exception):
super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}")
-def redact_sensitive_fields(data: Dict[str, Any]) -> Dict[str, Any]:
- """Redact sensitive information from config before printing."""
- sensitive_patterns = ["api_key", "api_token", "password", "secret"]
-
- def _redact_dict(d: Dict[str, Any]) -> Dict[str, Any]:
- result = {}
- for k, v in d.items():
- if isinstance(v, dict):
- result[k] = _redact_dict(v)
- elif isinstance(v, list):
- result[k] = [_redact_dict(i) if isinstance(i, dict) else i for i in v]
- elif any(pattern in k.lower() for pattern in sensitive_patterns):
- result[k] = "********"
- else:
- result[k] = v
- return result
-
- return _redact_dict(data)
-
-
def replace_env_vars(config: Any, path: str = "") -> Any:
if isinstance(config, dict):
result = {}
@@ -215,6 +197,26 @@ def validate_env_pair(env_pair: str) -> tuple[str, str]:
) from e
+def add_internal_implementations(impls: Dict[Api, Any], run_config: StackRunConfig) -> None:
+ """Add internal implementations (inspect and providers) to the implementations dictionary.
+
+ Args:
+ impls: Dictionary of API implementations
+ run_config: Stack run configuration
+ """
+ inspect_impl = DistributionInspectImpl(
+ DistributionInspectConfig(run_config=run_config),
+ deps=impls,
+ )
+ impls[Api.inspect] = inspect_impl
+
+ providers_impl = ProviderImpl(
+ ProviderImplConfig(run_config=run_config),
+ deps=impls,
+ )
+ impls[Api.providers] = providers_impl
+
+
# Produces a stack of providers for the given run config. Not all APIs may be
# asked for in the run config.
async def construct_stack(
@@ -222,6 +224,10 @@ async def construct_stack(
) -> Dict[Api, Any]:
dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
impls = await resolve_impls(run_config, provider_registry or get_provider_registry(run_config), dist_registry)
+
+ # Add internal implementations after all other providers are resolved
+ add_internal_implementations(impls, run_config)
+
await register_resources(run_config, impls)
return impls
diff --git a/llama_stack/distribution/ui/page/playground/tools.py b/llama_stack/distribution/ui/page/playground/tools.py
index e987f617b..bc2e8975f 100644
--- a/llama_stack/distribution/ui/page/playground/tools.py
+++ b/llama_stack/distribution/ui/page/playground/tools.py
@@ -56,6 +56,17 @@ def tool_chat_page():
st.subheader(f"Active Tools: 🛠 {len(active_tool_list)}")
st.json(active_tool_list)
+ st.subheader("Chat Configurations")
+ max_tokens = st.slider(
+ "Max Tokens",
+ min_value=0,
+ max_value=4096,
+ value=512,
+ step=1,
+ help="The maximum number of tokens to generate",
+ on_change=reset_agent,
+ )
+
@st.cache_resource
def create_agent():
return Agent(
@@ -63,9 +74,7 @@ def tool_chat_page():
model=model,
instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
tools=toolgroup_selection,
- sampling_params={
- "strategy": {"type": "greedy"},
- },
+ sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
)
agent = create_agent()
diff --git a/llama_stack/distribution/utils/config.py b/llama_stack/distribution/utils/config.py
new file mode 100644
index 000000000..5e78289b7
--- /dev/null
+++ b/llama_stack/distribution/utils/config.py
@@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Dict
+
+
+def redact_sensitive_fields(data: Dict[str, Any]) -> Dict[str, Any]:
+ """Redact sensitive information from config before printing."""
+ sensitive_patterns = ["api_key", "api_token", "password", "secret"]
+
+ def _redact_value(v: Any) -> Any:
+ if isinstance(v, dict):
+ return _redact_dict(v)
+ elif isinstance(v, list):
+ return [_redact_value(i) for i in v]
+ return v
+
+ def _redact_dict(d: Dict[str, Any]) -> Dict[str, Any]:
+ result = {}
+ for k, v in d.items():
+ if any(pattern in k.lower() for pattern in sensitive_patterns):
+ result[k] = "********"
+ else:
+ result[k] = _redact_value(v)
+ return result
+
+ return _redact_dict(data)
diff --git a/llama_stack/models/llama/llama3/tool_utils.py b/llama_stack/models/llama/llama3/tool_utils.py
index ef39ba0a5..91b46ec98 100644
--- a/llama_stack/models/llama/llama3/tool_utils.py
+++ b/llama_stack/models/llama/llama3/tool_utils.py
@@ -204,7 +204,9 @@ class ToolUtils:
return None
elif is_json(message_body):
response = json.loads(message_body)
- if ("type" in response and response["type"] == "function") or ("name" in response):
+ if ("type" in response and response["type"] == "function") or (
+ "name" in response and "parameters" in response
+ ):
function_name = response["name"]
args = response["parameters"]
return function_name, args
diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py
index 32dfba30c..c3141f807 100644
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+from enum import Enum
from typing import Any, List, Optional, Protocol
from urllib.parse import urlparse
@@ -201,3 +202,12 @@ def remote_provider_spec(
adapter=adapter,
api_dependencies=api_dependencies or [],
)
+
+
+class HealthStatus(str, Enum):
+ OK = "OK"
+ ERROR = "Error"
+ NOT_IMPLEMENTED = "Not Implemented"
+
+
+HealthResponse = dict[str, Any]
diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py
index 0b56ba1f7..2b9a27982 100644
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -59,8 +59,8 @@ from llama_stack.providers.utils.inference.model_registry import (
build_hf_repo_model_entry,
)
from llama_stack.providers.utils.inference.openai_compat import (
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
)
from llama_stack.providers.utils.inference.prompt_adapter import (
augment_content_with_response_format_prompt,
@@ -83,8 +83,8 @@ def llama_builder_fn(config: MetaReferenceInferenceConfig, model_id: str, llama_
class MetaReferenceInferenceImpl(
- OpenAICompletionUnsupportedMixin,
- OpenAIChatCompletionUnsupportedMixin,
+ OpenAICompletionToLlamaStackMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
SentenceTransformerEmbeddingMixin,
Inference,
ModelsProtocolPrivate,
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index 5bc20e3c2..d717d055f 100644
--- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -25,8 +25,8 @@ from llama_stack.providers.utils.inference.embedding_mixin import (
SentenceTransformerEmbeddingMixin,
)
from llama_stack.providers.utils.inference.openai_compat import (
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
)
from .config import SentenceTransformersInferenceConfig
@@ -35,8 +35,8 @@ log = logging.getLogger(__name__)
class SentenceTransformersInferenceImpl(
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
SentenceTransformerEmbeddingMixin,
Inference,
ModelsProtocolPrivate,
diff --git a/llama_stack/providers/inline/inference/vllm/vllm.py b/llama_stack/providers/inline/inference/vllm/vllm.py
index 085c79d6b..9d742c39c 100644
--- a/llama_stack/providers/inline/inference/vllm/vllm.py
+++ b/llama_stack/providers/inline/inference/vllm/vllm.py
@@ -66,10 +66,10 @@ from llama_stack.providers.utils.inference.model_registry import (
ModelsProtocolPrivate,
)
from llama_stack.providers.utils.inference.openai_compat import (
- OpenAIChatCompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
OpenAICompatCompletionChoice,
OpenAICompatCompletionResponse,
- OpenAICompletionUnsupportedMixin,
+ OpenAICompletionToLlamaStackMixin,
get_stop_reason,
process_chat_completion_stream_response,
)
@@ -176,8 +176,8 @@ def _convert_sampling_params(
class VLLMInferenceImpl(
Inference,
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
ModelsProtocolPrivate,
):
"""
diff --git a/llama_stack/providers/inline/post_training/torchtune/post_training.py b/llama_stack/providers/inline/post_training/torchtune/post_training.py
index 2c129ef41..cc1a6a5fe 100644
--- a/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/llama_stack/providers/inline/post_training/torchtune/post_training.py
@@ -3,13 +3,14 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from datetime import datetime, timezone
+from enum import Enum
from typing import Any, Dict, Optional
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.post_training import (
AlgorithmConfig,
+ Checkpoint,
DPOAlignmentConfig,
JobStatus,
ListPostTrainingJobsResponse,
@@ -25,9 +26,19 @@ from llama_stack.providers.inline.post_training.torchtune.config import (
from llama_stack.providers.inline.post_training.torchtune.recipes.lora_finetuning_single_device import (
LoraFinetuningSingleDevice,
)
+from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
+from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
from llama_stack.schema_utils import webmethod
+class TrainingArtifactType(Enum):
+ CHECKPOINT = "checkpoint"
+ RESOURCES_STATS = "resources_stats"
+
+
+_JOB_TYPE_SUPERVISED_FINE_TUNE = "supervised-fine-tune"
+
+
class TorchtunePostTrainingImpl:
def __init__(
self,
@@ -38,13 +49,27 @@ class TorchtunePostTrainingImpl:
self.config = config
self.datasetio_api = datasetio_api
self.datasets_api = datasets
+ self._scheduler = Scheduler()
- # TODO: assume sync job, will need jobs API for async scheduling
- self.jobs = {}
- self.checkpoints_dict = {}
+ async def shutdown(self) -> None:
+ await self._scheduler.shutdown()
- async def shutdown(self):
- pass
+ @staticmethod
+ def _checkpoint_to_artifact(checkpoint: Checkpoint) -> JobArtifact:
+ return JobArtifact(
+ type=TrainingArtifactType.CHECKPOINT.value,
+ name=checkpoint.identifier,
+ uri=checkpoint.path,
+ metadata=dict(checkpoint),
+ )
+
+ @staticmethod
+ def _resources_stats_to_artifact(resources_stats: Dict[str, Any]) -> JobArtifact:
+ return JobArtifact(
+ type=TrainingArtifactType.RESOURCES_STATS.value,
+ name=TrainingArtifactType.RESOURCES_STATS.value,
+ metadata=resources_stats,
+ )
async def supervised_fine_tune(
self,
@@ -56,20 +81,11 @@ class TorchtunePostTrainingImpl:
checkpoint_dir: Optional[str],
algorithm_config: Optional[AlgorithmConfig],
) -> PostTrainingJob:
- if job_uuid in self.jobs:
- raise ValueError(f"Job {job_uuid} already exists")
-
- post_training_job = PostTrainingJob(job_uuid=job_uuid)
-
- job_status_response = PostTrainingJobStatusResponse(
- job_uuid=job_uuid,
- status=JobStatus.scheduled,
- scheduled_at=datetime.now(timezone.utc),
- )
- self.jobs[job_uuid] = job_status_response
-
if isinstance(algorithm_config, LoraFinetuningConfig):
- try:
+
+ async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb):
+ on_log_message_cb("Starting Lora finetuning")
+
recipe = LoraFinetuningSingleDevice(
self.config,
job_uuid,
@@ -82,26 +98,22 @@ class TorchtunePostTrainingImpl:
self.datasetio_api,
self.datasets_api,
)
-
- job_status_response.status = JobStatus.in_progress
- job_status_response.started_at = datetime.now(timezone.utc)
-
await recipe.setup()
+
resources_allocated, checkpoints = await recipe.train()
- self.checkpoints_dict[job_uuid] = checkpoints
- job_status_response.resources_allocated = resources_allocated
- job_status_response.checkpoints = checkpoints
- job_status_response.status = JobStatus.completed
- job_status_response.completed_at = datetime.now(timezone.utc)
+ on_artifact_collected_cb(self._resources_stats_to_artifact(resources_allocated))
+ for checkpoint in checkpoints:
+ artifact = self._checkpoint_to_artifact(checkpoint)
+ on_artifact_collected_cb(artifact)
- except Exception:
- job_status_response.status = JobStatus.failed
- raise
+ on_status_change_cb(SchedulerJobStatus.completed)
+ on_log_message_cb("Lora finetuning completed")
else:
raise NotImplementedError()
- return post_training_job
+ job_uuid = self._scheduler.schedule(_JOB_TYPE_SUPERVISED_FINE_TUNE, job_uuid, handler)
+ return PostTrainingJob(job_uuid=job_uuid)
async def preference_optimize(
self,
@@ -114,19 +126,55 @@ class TorchtunePostTrainingImpl:
) -> PostTrainingJob: ...
async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
- return ListPostTrainingJobsResponse(data=[PostTrainingJob(job_uuid=uuid_) for uuid_ in self.jobs])
+ return ListPostTrainingJobsResponse(
+ data=[PostTrainingJob(job_uuid=job.id) for job in self._scheduler.get_jobs()]
+ )
+
+ @staticmethod
+ def _get_artifacts_metadata_by_type(job, artifact_type):
+ return [artifact.metadata for artifact in job.artifacts if artifact.type == artifact_type]
+
+ @classmethod
+ def _get_checkpoints(cls, job):
+ return cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.CHECKPOINT.value)
+
+ @classmethod
+ def _get_resources_allocated(cls, job):
+ data = cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.RESOURCES_STATS.value)
+ return data[0] if data else None
@webmethod(route="/post-training/job/status")
async def get_training_job_status(self, job_uuid: str) -> Optional[PostTrainingJobStatusResponse]:
- return self.jobs.get(job_uuid, None)
+ job = self._scheduler.get_job(job_uuid)
+
+ match job.status:
+ # TODO: Add support for other statuses to API
+ case SchedulerJobStatus.new | SchedulerJobStatus.scheduled:
+ status = JobStatus.scheduled
+ case SchedulerJobStatus.running:
+ status = JobStatus.in_progress
+ case SchedulerJobStatus.completed:
+ status = JobStatus.completed
+ case SchedulerJobStatus.failed:
+ status = JobStatus.failed
+ case _:
+ raise NotImplementedError()
+
+ return PostTrainingJobStatusResponse(
+ job_uuid=job_uuid,
+ status=status,
+ scheduled_at=job.scheduled_at,
+ started_at=job.started_at,
+ completed_at=job.completed_at,
+ checkpoints=self._get_checkpoints(job),
+ resources_allocated=self._get_resources_allocated(job),
+ )
@webmethod(route="/post-training/job/cancel")
async def cancel_training_job(self, job_uuid: str) -> None:
- raise NotImplementedError("Job cancel is not implemented yet")
+ self._scheduler.cancel(job_uuid)
@webmethod(route="/post-training/job/artifacts")
async def get_training_job_artifacts(self, job_uuid: str) -> Optional[PostTrainingJobArtifactsResponse]:
- if job_uuid in self.checkpoints_dict:
- checkpoints = self.checkpoints_dict.get(job_uuid, [])
- return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=checkpoints)
- return None
+ job = self._scheduler.get_job(job_uuid)
+ return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=self._get_checkpoints(job))
diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 0a485da8f..f8dbcf31a 100644
--- a/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -36,10 +36,10 @@ from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
)
from llama_stack.providers.utils.inference.openai_compat import (
- OpenAIChatCompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
OpenAICompatCompletionChoice,
OpenAICompatCompletionResponse,
- OpenAICompletionUnsupportedMixin,
+ OpenAICompletionToLlamaStackMixin,
get_sampling_strategy_options,
process_chat_completion_response,
process_chat_completion_stream_response,
@@ -56,8 +56,8 @@ from .models import MODEL_ENTRIES
class BedrockInferenceAdapter(
ModelRegistryHelper,
Inference,
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
):
def __init__(self, config: BedrockConfig) -> None:
ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py
index 5e0a5b484..3156601be 100644
--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -34,8 +34,8 @@ from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
)
from llama_stack.providers.utils.inference.openai_compat import (
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
get_sampling_options,
process_chat_completion_response,
process_chat_completion_stream_response,
@@ -54,8 +54,8 @@ from .models import MODEL_ENTRIES
class CerebrasInferenceAdapter(
ModelRegistryHelper,
Inference,
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
):
def __init__(self, config: CerebrasImplConfig) -> None:
ModelRegistryHelper.__init__(
diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py
index a10878b27..27d96eb7d 100644
--- a/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -34,8 +34,8 @@ from llama_stack.providers.utils.inference.model_registry import (
build_hf_repo_model_entry,
)
from llama_stack.providers.utils.inference.openai_compat import (
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
get_sampling_options,
process_chat_completion_response,
process_chat_completion_stream_response,
@@ -61,8 +61,8 @@ model_entries = [
class DatabricksInferenceAdapter(
ModelRegistryHelper,
Inference,
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
):
def __init__(self, config: DatabricksImplConfig) -> None:
ModelRegistryHelper.__init__(self, model_entries=model_entries)
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py
index b59e9f2cb..48c163c87 100644
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
from fireworks.client import Fireworks
from openai import AsyncOpenAI
@@ -32,13 +32,20 @@ from llama_stack.apis.inference import (
ToolDefinition,
ToolPromptFormat,
)
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+ OpenAIChatCompletion,
+ OpenAIChatCompletionChunk,
+ OpenAICompletion,
+ OpenAIMessageParam,
+ OpenAIResponseFormatParam,
+)
from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
)
from llama_stack.providers.utils.inference.openai_compat import (
+ OpenAIChatCompletionToLlamaStackMixin,
convert_message_to_openai_dict,
get_sampling_options,
prepare_openai_completion_params,
@@ -301,6 +308,11 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
prompt_logprobs: Optional[int] = None,
) -> OpenAICompletion:
model_obj = await self.model_store.get_model(model)
+
+ # Fireworks always prepends with BOS
+ if isinstance(prompt, str) and prompt.startswith("<|begin_of_text|>"):
+ prompt = prompt[len("<|begin_of_text|>") :]
+
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
prompt=prompt,
@@ -320,6 +332,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
top_p=top_p,
user=user,
)
+
return await self._get_openai_client().completions.create(**params)
async def openai_chat_completion(
@@ -336,7 +349,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
n: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
- response_format: Optional[Dict[str, str]] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
@@ -347,10 +360,9 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
user: Optional[str] = None,
- ) -> OpenAIChatCompletion:
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
model_obj = await self.model_store.get_model(model)
params = await prepare_openai_completion_params(
- model=model_obj.provider_resource_id,
messages=messages,
frequency_penalty=frequency_penalty,
function_call=function_call,
@@ -374,4 +386,12 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
top_p=top_p,
user=user,
)
- return await self._get_openai_client().chat.completions.create(**params)
+
+ # Divert Llama Models through Llama Stack inference APIs because
+ # Fireworks chat completions OpenAI-compatible API does not support
+ # tool calls properly.
+ llama_model = self.get_llama_model(model_obj.provider_resource_id)
+ if llama_model:
+ return await OpenAIChatCompletionToLlamaStackMixin.openai_chat_completion(self, model=model, **params)
+
+ return await self._get_openai_client().chat.completions.create(model=model_obj.provider_resource_id, **params)
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py
index c8789434f..f3f14e9af 100644
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@@ -4,8 +4,24 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+from typing import Any, AsyncIterator, Dict, List, Optional, Union
+
+from openai import AsyncOpenAI
+
+from llama_stack.apis.inference.inference import (
+ OpenAIChatCompletion,
+ OpenAIChatCompletionChunk,
+ OpenAIChoiceDelta,
+ OpenAIChunkChoice,
+ OpenAIMessageParam,
+ OpenAIResponseFormatParam,
+ OpenAISystemMessageParam,
+)
from llama_stack.providers.remote.inference.groq.config import GroqConfig
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+from llama_stack.providers.utils.inference.openai_compat import (
+ prepare_openai_completion_params,
+)
from .models import MODEL_ENTRIES
@@ -21,9 +37,129 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
provider_data_api_key_field="groq_api_key",
)
self.config = config
+ self._openai_client = None
async def initialize(self):
await super().initialize()
async def shutdown(self):
await super().shutdown()
+ if self._openai_client:
+ await self._openai_client.close()
+ self._openai_client = None
+
+ def _get_openai_client(self) -> AsyncOpenAI:
+ if not self._openai_client:
+ self._openai_client = AsyncOpenAI(
+ base_url=f"{self.config.url}/openai/v1",
+ api_key=self.config.api_key,
+ )
+ return self._openai_client
+
+ async def openai_chat_completion(
+ self,
+ model: str,
+ messages: List[OpenAIMessageParam],
+ frequency_penalty: Optional[float] = None,
+ function_call: Optional[Union[str, Dict[str, Any]]] = None,
+ functions: Optional[List[Dict[str, Any]]] = None,
+ logit_bias: Optional[Dict[str, float]] = None,
+ logprobs: Optional[bool] = None,
+ max_completion_tokens: Optional[int] = None,
+ max_tokens: Optional[int] = None,
+ n: Optional[int] = None,
+ parallel_tool_calls: Optional[bool] = None,
+ presence_penalty: Optional[float] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
+ seed: Optional[int] = None,
+ stop: Optional[Union[str, List[str]]] = None,
+ stream: Optional[bool] = None,
+ stream_options: Optional[Dict[str, Any]] = None,
+ temperature: Optional[float] = None,
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+ tools: Optional[List[Dict[str, Any]]] = None,
+ top_logprobs: Optional[int] = None,
+ top_p: Optional[float] = None,
+ user: Optional[str] = None,
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
+ model_obj = await self.model_store.get_model(model)
+
+ # Groq does not support json_schema response format, so we need to convert it to json_object
+ if response_format and response_format.type == "json_schema":
+ response_format.type = "json_object"
+ schema = response_format.json_schema.get("schema", {})
+ response_format.json_schema = None
+ json_instructions = f"\nYour response should be a JSON object that matches the following schema: {schema}"
+ if messages and messages[0].role == "system":
+ messages[0].content = messages[0].content + json_instructions
+ else:
+ messages.insert(0, OpenAISystemMessageParam(content=json_instructions))
+
+ # Groq returns a 400 error if tools are provided but none are called
+ # So, set tool_choice to "required" to attempt to force a call
+ if tools and (not tool_choice or tool_choice == "auto"):
+ tool_choice = "required"
+
+ params = await prepare_openai_completion_params(
+ model=model_obj.provider_resource_id.replace("groq/", ""),
+ messages=messages,
+ frequency_penalty=frequency_penalty,
+ function_call=function_call,
+ functions=functions,
+ logit_bias=logit_bias,
+ logprobs=logprobs,
+ max_completion_tokens=max_completion_tokens,
+ max_tokens=max_tokens,
+ n=n,
+ parallel_tool_calls=parallel_tool_calls,
+ presence_penalty=presence_penalty,
+ response_format=response_format,
+ seed=seed,
+ stop=stop,
+ stream=stream,
+ stream_options=stream_options,
+ temperature=temperature,
+ tool_choice=tool_choice,
+ tools=tools,
+ top_logprobs=top_logprobs,
+ top_p=top_p,
+ user=user,
+ )
+
+ # Groq does not support streaming requests that set response_format
+ fake_stream = False
+ if stream and response_format:
+ params["stream"] = False
+ fake_stream = True
+
+ response = await self._get_openai_client().chat.completions.create(**params)
+
+ if fake_stream:
+ chunk_choices = []
+ for choice in response.choices:
+ delta = OpenAIChoiceDelta(
+ content=choice.message.content,
+ role=choice.message.role,
+ tool_calls=choice.message.tool_calls,
+ )
+ chunk_choice = OpenAIChunkChoice(
+ delta=delta,
+ finish_reason=choice.finish_reason,
+ index=choice.index,
+ logprobs=None,
+ )
+ chunk_choices.append(chunk_choice)
+ chunk = OpenAIChatCompletionChunk(
+ id=response.id,
+ choices=chunk_choices,
+ object="chat.completion.chunk",
+ created=response.created,
+ model=response.model,
+ )
+
+ async def _fake_stream_generator():
+ yield chunk
+
+ return _fake_stream_generator()
+ else:
+ return response
diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py
index d0c10ca62..0b4b81cfe 100644
--- a/llama_stack/providers/remote/inference/groq/models.py
+++ b/llama_stack/providers/remote/inference/groq/models.py
@@ -39,8 +39,16 @@ MODEL_ENTRIES = [
"groq/llama-4-scout-17b-16e-instruct",
CoreModelId.llama4_scout_17b_16e_instruct.value,
),
+ build_hf_repo_model_entry(
+ "groq/meta-llama/llama-4-scout-17b-16e-instruct",
+ CoreModelId.llama4_scout_17b_16e_instruct.value,
+ ),
build_hf_repo_model_entry(
"groq/llama-4-maverick-17b-128e-instruct",
CoreModelId.llama4_maverick_17b_128e_instruct.value,
),
+ build_hf_repo_model_entry(
+ "groq/meta-llama/llama-4-maverick-17b-128e-instruct",
+ CoreModelId.llama4_maverick_17b_128e_instruct.value,
+ ),
]
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index c13be2e25..c683bbe2b 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -34,15 +34,18 @@ from llama_stack.apis.inference import (
ToolChoice,
ToolConfig,
)
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
from llama_stack.apis.models import Model, ModelType
-from llama_stack.models.llama.datatypes import (
- ToolDefinition,
- ToolPromptFormat,
-)
from llama_stack.providers.utils.inference import (
ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
)
+from llama_stack.apis.inference.inference import (
+ OpenAIChatCompletion,
+ OpenAIChatCompletionChunk,
+ OpenAICompletion,
+ OpenAIMessageParam,
+ OpenAIResponseFormatParam,
+)
+from llama_stack.models.llama.datatypes import ToolPromptFormat
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
)
@@ -335,7 +338,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
n: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
- response_format: Optional[Dict[str, str]] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
@@ -346,7 +349,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
user: Optional[str] = None,
- ) -> OpenAIChatCompletion:
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
provider_model_id = self.get_provider_model_id(model)
params = await prepare_openai_completion_params(
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 33b48af46..cdfe7b568 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -5,7 +5,7 @@
# the root directory of this source tree.
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
import httpx
from ollama import AsyncClient
@@ -39,10 +39,20 @@ from llama_stack.apis.inference import (
ToolDefinition,
ToolPromptFormat,
)
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+ OpenAIChatCompletion,
+ OpenAIChatCompletionChunk,
+ OpenAICompletion,
+ OpenAIMessageParam,
+ OpenAIResponseFormatParam,
+)
from llama_stack.apis.models import Model, ModelType
from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ModelsProtocolPrivate
+from llama_stack.providers.datatypes import (
+ HealthResponse,
+ HealthStatus,
+ ModelsProtocolPrivate,
+)
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
)
@@ -87,8 +97,19 @@ class OllamaInferenceAdapter(
async def initialize(self) -> None:
logger.info(f"checking connectivity to Ollama at `{self.url}`...")
+ await self.health()
+
+ async def health(self) -> HealthResponse:
+ """
+ Performs a health check by verifying connectivity to the Ollama server.
+ This method is used by initialize() and the Provider API to verify that the service is running
+ correctly.
+ Returns:
+ HealthResponse: A dictionary containing the health status.
+ """
try:
await self.client.ps()
+ return HealthResponse(status=HealthStatus.OK)
except httpx.ConnectError as e:
raise RuntimeError(
"Ollama Server is not running, start it using `ollama serve` in a separate terminal"
@@ -322,6 +343,12 @@ class OllamaInferenceAdapter(
response = await self.client.list()
available_models = [m["model"] for m in response["models"]]
if model.provider_resource_id not in available_models:
+ available_models_latest = [m["model"].split(":latest")[0] for m in response["models"]]
+ if model.provider_resource_id in available_models_latest:
+ logger.warning(
+ f"Imprecise provider resource id was used but 'latest' is available in Ollama - using '{model.provider_resource_id}:latest'"
+ )
+ return model
raise ValueError(
f"Model '{model.provider_resource_id}' is not available in Ollama. Available models: {', '.join(available_models)}"
)
@@ -393,7 +420,7 @@ class OllamaInferenceAdapter(
n: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
- response_format: Optional[Dict[str, str]] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
@@ -404,7 +431,7 @@ class OllamaInferenceAdapter(
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
user: Optional[str] = None,
- ) -> OpenAIChatCompletion:
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
model_obj = await self._get_model(model)
params = {
k: v
diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 0eb38c395..af05320b0 100644
--- a/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
from llama_stack_client import AsyncLlamaStackClient
@@ -26,7 +26,13 @@ from llama_stack.apis.inference import (
ToolDefinition,
ToolPromptFormat,
)
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+ OpenAIChatCompletion,
+ OpenAIChatCompletionChunk,
+ OpenAICompletion,
+ OpenAIMessageParam,
+ OpenAIResponseFormatParam,
+)
from llama_stack.apis.models import Model
from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
@@ -266,7 +272,7 @@ class PassthroughInferenceAdapter(Inference):
n: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
- response_format: Optional[Dict[str, str]] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
@@ -277,7 +283,7 @@ class PassthroughInferenceAdapter(Inference):
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
user: Optional[str] = None,
- ) -> OpenAIChatCompletion:
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
client = self._get_client()
model_obj = await self.model_store.get_model(model)
diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py
index 878460122..72cbead9b 100644
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -12,8 +12,8 @@ from llama_stack.apis.inference import * # noqa: F403
# from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.providers.utils.inference.openai_compat import (
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
get_sampling_options,
process_chat_completion_response,
process_chat_completion_stream_response,
@@ -43,8 +43,8 @@ RUNPOD_SUPPORTED_MODELS = {
class RunpodInferenceAdapter(
ModelRegistryHelper,
Inference,
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
):
def __init__(self, config: RunpodImplConfig) -> None:
ModelRegistryHelper.__init__(self, stack_to_provider_models_map=RUNPOD_SUPPORTED_MODELS)
diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py
index c503657eb..1665e72b8 100644
--- a/llama_stack/providers/remote/inference/sambanova/sambanova.py
+++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py
@@ -42,8 +42,8 @@ from llama_stack.apis.inference import (
)
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.providers.utils.inference.openai_compat import (
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
process_chat_completion_stream_response,
)
from llama_stack.providers.utils.inference.prompt_adapter import (
@@ -57,8 +57,8 @@ from .models import MODEL_ENTRIES
class SambaNovaInferenceAdapter(
ModelRegistryHelper,
Inference,
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
):
def __init__(self, config: SambaNovaImplConfig) -> None:
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py
index 8f5b5e3cc..4ee386a15 100644
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -40,10 +40,10 @@ from llama_stack.providers.utils.inference.model_registry import (
build_hf_repo_model_entry,
)
from llama_stack.providers.utils.inference.openai_compat import (
- OpenAIChatCompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
OpenAICompatCompletionChoice,
OpenAICompatCompletionResponse,
- OpenAICompletionUnsupportedMixin,
+ OpenAICompletionToLlamaStackMixin,
get_sampling_options,
process_chat_completion_response,
process_chat_completion_stream_response,
@@ -73,8 +73,8 @@ def build_hf_repo_model_entries():
class _HfAdapter(
Inference,
- OpenAIChatCompletionUnsupportedMixin,
- OpenAICompletionUnsupportedMixin,
+ OpenAIChatCompletionToLlamaStackMixin,
+ OpenAICompletionToLlamaStackMixin,
ModelsProtocolPrivate,
):
client: AsyncInferenceClient
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index 1615b8cd1..001e6aac4 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
from openai import AsyncOpenAI
from together import AsyncTogether
@@ -31,7 +31,13 @@ from llama_stack.apis.inference import (
ToolDefinition,
ToolPromptFormat,
)
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+ OpenAIChatCompletion,
+ OpenAIChatCompletionChunk,
+ OpenAICompletion,
+ OpenAIMessageParam,
+ OpenAIResponseFormatParam,
+)
from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
@@ -315,7 +321,7 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
n: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
- response_format: Optional[Dict[str, str]] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
@@ -326,7 +332,7 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
user: Optional[str] = None,
- ) -> OpenAIChatCompletion:
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
model_obj = await self.model_store.get_model(model)
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
@@ -353,4 +359,26 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
top_p=top_p,
user=user,
)
+ if params.get("stream", True):
+ return self._stream_openai_chat_completion(params)
return await self._get_openai_client().chat.completions.create(**params) # type: ignore
+
+ async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator:
+ # together.ai sometimes adds usage data to the stream, even if include_usage is False
+ # This causes an unexpected final chunk with empty choices array to be sent
+ # to clients that may not handle it gracefully.
+ include_usage = False
+ if params.get("stream_options", None):
+ include_usage = params["stream_options"].get("include_usage", False)
+ stream = await self._get_openai_client().chat.completions.create(**params)
+
+ seen_finish_reason = False
+ async for chunk in stream:
+ # Final usage chunk with no choices that the user didn't request, so discard
+ if not include_usage and seen_finish_reason and len(chunk.choices) == 0:
+ break
+ yield chunk
+ for choice in chunk.choices:
+ if choice.finish_reason:
+ seen_finish_reason = True
+ break
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 0044d2e75..d141afa86 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -5,7 +5,7 @@
# the root directory of this source tree.
import json
import logging
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
import httpx
from openai import AsyncOpenAI
@@ -45,7 +45,12 @@ from llama_stack.apis.inference import (
ToolDefinition,
ToolPromptFormat,
)
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+ OpenAIChatCompletion,
+ OpenAICompletion,
+ OpenAIMessageParam,
+ OpenAIResponseFormatParam,
+)
from llama_stack.apis.models import Model, ModelType
from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
from llama_stack.models.llama.sku_list import all_registered_models
@@ -369,7 +374,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
options["max_tokens"] = self.config.max_tokens
input_dict: dict[str, Any] = {}
- if isinstance(request, ChatCompletionRequest) and request.tools is not None:
+ # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM.
+ if isinstance(request, ChatCompletionRequest) and request.tools:
input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}
if isinstance(request, ChatCompletionRequest):
@@ -487,7 +493,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
n: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
- response_format: Optional[Dict[str, str]] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
@@ -498,7 +504,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
user: Optional[str] = None,
- ) -> OpenAIChatCompletion:
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
model_obj = await self._get_model(model)
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index cd0f4ec67..efe7031f5 100644
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -30,7 +30,13 @@ from llama_stack.apis.inference import (
ToolDefinition,
ToolPromptFormat,
)
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAIMessageParam
+from llama_stack.apis.inference.inference import (
+ OpenAIChatCompletion,
+ OpenAIChatCompletionChunk,
+ OpenAICompletion,
+ OpenAIMessageParam,
+ OpenAIResponseFormatParam,
+)
from llama_stack.apis.models.models import Model
from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
@@ -270,7 +276,7 @@ class LiteLLMOpenAIMixin(
guided_choice: Optional[List[str]] = None,
prompt_logprobs: Optional[int] = None,
) -> OpenAICompletion:
- model_obj = await self._get_model(model)
+ model_obj = await self.model_store.get_model(model)
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
prompt=prompt,
@@ -292,7 +298,7 @@ class LiteLLMOpenAIMixin(
guided_choice=guided_choice,
prompt_logprobs=prompt_logprobs,
)
- return litellm.text_completion(**params)
+ return await litellm.atext_completion(**params)
async def openai_chat_completion(
self,
@@ -308,7 +314,7 @@ class LiteLLMOpenAIMixin(
n: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
- response_format: Optional[Dict[str, str]] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
@@ -319,8 +325,8 @@ class LiteLLMOpenAIMixin(
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
user: Optional[str] = None,
- ) -> OpenAIChatCompletion:
- model_obj = await self._get_model(model)
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
+ model_obj = await self.model_store.get_model(model)
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
messages=messages,
@@ -346,7 +352,7 @@ class LiteLLMOpenAIMixin(
top_p=top_p,
user=user,
)
- return litellm.completion(**params)
+ return await litellm.acompletion(**params)
async def batch_completion(
self,
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index f33cb4443..d98261abb 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -8,7 +8,7 @@ import logging
import time
import uuid
import warnings
-from typing import Any, AsyncGenerator, Dict, Iterable, List, Optional, Union
+from typing import Any, AsyncGenerator, AsyncIterator, Awaitable, Dict, Iterable, List, Optional, Union
from openai import AsyncStream
from openai.types.chat import (
@@ -50,6 +50,18 @@ from openai.types.chat.chat_completion import (
from openai.types.chat.chat_completion import (
ChoiceLogprobs as OpenAIChoiceLogprobs, # same as chat_completion_chunk ChoiceLogprobs
)
+from openai.types.chat.chat_completion_chunk import (
+ Choice as OpenAIChatCompletionChunkChoice,
+)
+from openai.types.chat.chat_completion_chunk import (
+ ChoiceDelta as OpenAIChoiceDelta,
+)
+from openai.types.chat.chat_completion_chunk import (
+ ChoiceDeltaToolCall as OpenAIChoiceDeltaToolCall,
+)
+from openai.types.chat.chat_completion_chunk import (
+ ChoiceDeltaToolCallFunction as OpenAIChoiceDeltaToolCallFunction,
+)
from openai.types.chat.chat_completion_content_part_image_param import (
ImageURL as OpenAIImageURL,
)
@@ -59,6 +71,7 @@ from openai.types.chat.chat_completion_message_tool_call_param import (
from pydantic import BaseModel
from llama_stack.apis.common.content_types import (
+ URL,
ImageContentItem,
InterleavedContent,
TextContentItem,
@@ -85,12 +98,24 @@ from llama_stack.apis.inference import (
TopPSamplingStrategy,
UserMessage,
)
-from llama_stack.apis.inference.inference import OpenAIChatCompletion, OpenAICompletion, OpenAICompletionChoice
+from llama_stack.apis.inference.inference import (
+ JsonSchemaResponseFormat,
+ OpenAIChatCompletion,
+ OpenAICompletion,
+ OpenAICompletionChoice,
+ OpenAIMessageParam,
+ OpenAIResponseFormatParam,
+ ToolConfig,
+)
+from llama_stack.apis.inference.inference import (
+ OpenAIChoice as OpenAIChatCompletionChoice,
+)
from llama_stack.models.llama.datatypes import (
BuiltinTool,
StopReason,
ToolCall,
ToolDefinition,
+ ToolParamDefinition,
)
from llama_stack.providers.utils.inference.prompt_adapter import (
convert_image_content_to_url,
@@ -751,6 +776,17 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
return out
+def _convert_stop_reason_to_openai_finish_reason(stop_reason: StopReason) -> str:
+ """
+ Convert a StopReason to an OpenAI chat completion finish_reason.
+ """
+ return {
+ StopReason.end_of_turn: "stop",
+ StopReason.end_of_message: "tool_calls",
+ StopReason.out_of_tokens: "length",
+ }.get(stop_reason, "stop")
+
+
def _convert_openai_finish_reason(finish_reason: str) -> StopReason:
"""
Convert an OpenAI chat completion finish_reason to a StopReason.
@@ -776,6 +812,56 @@ def _convert_openai_finish_reason(finish_reason: str) -> StopReason:
}.get(finish_reason, StopReason.end_of_turn)
+def _convert_openai_request_tool_config(tool_choice: Optional[Union[str, Dict[str, Any]]] = None) -> ToolConfig:
+ tool_config = ToolConfig()
+ if tool_choice:
+ tool_config.tool_choice = tool_choice
+ return tool_config
+
+
+def _convert_openai_request_tools(tools: Optional[List[Dict[str, Any]]] = None) -> List[ToolDefinition]:
+ lls_tools = []
+ if not tools:
+ return lls_tools
+
+ for tool in tools:
+ tool_fn = tool.get("function", {})
+ tool_name = tool_fn.get("name", None)
+ tool_desc = tool_fn.get("description", None)
+
+ tool_params = tool_fn.get("parameters", None)
+ lls_tool_params = {}
+ if tool_params is not None:
+ tool_param_properties = tool_params.get("properties", {})
+ for tool_param_key, tool_param_value in tool_param_properties.items():
+ tool_param_def = ToolParamDefinition(
+ param_type=tool_param_value.get("type", None),
+ description=tool_param_value.get("description", None),
+ )
+ lls_tool_params[tool_param_key] = tool_param_def
+
+ lls_tool = ToolDefinition(
+ tool_name=tool_name,
+ description=tool_desc,
+ parameters=lls_tool_params,
+ )
+ lls_tools.append(lls_tool)
+ return lls_tools
+
+
+def _convert_openai_request_response_format(response_format: OpenAIResponseFormatParam = None):
+ if not response_format:
+ return None
+ # response_format can be a dict or a pydantic model
+ response_format = dict(response_format)
+ if response_format.get("type", "") == "json_schema":
+ return JsonSchemaResponseFormat(
+ type="json_schema",
+ json_schema=response_format.get("json_schema", {}).get("schema", ""),
+ )
+ return None
+
+
def _convert_openai_tool_calls(
tool_calls: List[OpenAIChatCompletionMessageToolCall],
) -> List[ToolCall]:
@@ -871,6 +957,40 @@ def _convert_openai_sampling_params(
return sampling_params
+def _convert_openai_request_messages(messages: List[OpenAIMessageParam]):
+ # Llama Stack messages and OpenAI messages are similar, but not identical.
+ lls_messages = []
+ for message in messages:
+ lls_message = dict(message)
+
+ # Llama Stack expects `call_id` but OpenAI uses `tool_call_id`
+ tool_call_id = lls_message.pop("tool_call_id", None)
+ if tool_call_id:
+ lls_message["call_id"] = tool_call_id
+
+ content = lls_message.get("content", None)
+ if isinstance(content, list):
+ lls_content = []
+ for item in content:
+ # items can either by pydantic models or dicts here...
+ item = dict(item)
+ if item.get("type", "") == "image_url":
+ lls_item = ImageContentItem(
+ type="image",
+ image=URL(uri=item.get("image_url", {}).get("url", "")),
+ )
+ elif item.get("type", "") == "text":
+ lls_item = TextContentItem(
+ type="text",
+ text=item.get("text", ""),
+ )
+ lls_content.append(lls_item)
+ lls_message["content"] = lls_content
+ lls_messages.append(lls_message)
+
+ return lls_messages
+
+
def convert_openai_chat_completion_choice(
choice: OpenAIChoice,
) -> ChatCompletionResponse:
@@ -1080,11 +1200,24 @@ async def convert_openai_chat_completion_stream(
async def prepare_openai_completion_params(**params):
- completion_params = {k: v for k, v in params.items() if v is not None}
+ async def _prepare_value(value: Any) -> Any:
+ new_value = value
+ if isinstance(value, list):
+ new_value = [await _prepare_value(v) for v in value]
+ elif isinstance(value, dict):
+ new_value = {k: await _prepare_value(v) for k, v in value.items()}
+ elif isinstance(value, BaseModel):
+ new_value = value.model_dump(exclude_none=True)
+ return new_value
+
+ completion_params = {}
+ for k, v in params.items():
+ if v is not None:
+ completion_params[k] = await _prepare_value(v)
return completion_params
-class OpenAICompletionUnsupportedMixin:
+class OpenAICompletionToLlamaStackMixin:
async def openai_completion(
self,
model: str,
@@ -1122,6 +1255,7 @@ class OpenAICompletionUnsupportedMixin:
choices = []
# "n" is the number of completions to generate per prompt
+ n = n or 1
for _i in range(0, n):
# and we may have multiple prompts, if batching was used
@@ -1134,7 +1268,7 @@ class OpenAICompletionUnsupportedMixin:
index = len(choices)
text = result.content
- finish_reason = _convert_openai_finish_reason(result.stop_reason)
+ finish_reason = _convert_stop_reason_to_openai_finish_reason(result.stop_reason)
choice = OpenAICompletionChoice(
index=index,
@@ -1152,7 +1286,7 @@ class OpenAICompletionUnsupportedMixin:
)
-class OpenAIChatCompletionUnsupportedMixin:
+class OpenAIChatCompletionToLlamaStackMixin:
async def openai_chat_completion(
self,
model: str,
@@ -1167,7 +1301,7 @@ class OpenAIChatCompletionUnsupportedMixin:
n: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
- response_format: Optional[Dict[str, str]] = None,
+ response_format: Optional[OpenAIResponseFormatParam] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
@@ -1178,5 +1312,103 @@ class OpenAIChatCompletionUnsupportedMixin:
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
user: Optional[str] = None,
+ ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
+ messages = _convert_openai_request_messages(messages)
+ response_format = _convert_openai_request_response_format(response_format)
+ sampling_params = _convert_openai_sampling_params(
+ max_tokens=max_tokens,
+ temperature=temperature,
+ top_p=top_p,
+ )
+ tool_config = _convert_openai_request_tool_config(tool_choice)
+ tools = _convert_openai_request_tools(tools)
+
+ outstanding_responses = []
+ # "n" is the number of completions to generate per prompt
+ n = n or 1
+ for _i in range(0, n):
+ response = self.chat_completion(
+ model_id=model,
+ messages=messages,
+ sampling_params=sampling_params,
+ response_format=response_format,
+ stream=stream,
+ tool_config=tool_config,
+ tools=tools,
+ )
+ outstanding_responses.append(response)
+
+ if stream:
+ return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses)
+
+ return await OpenAIChatCompletionToLlamaStackMixin._process_non_stream_response(
+ self, model, outstanding_responses
+ )
+
+ async def _process_stream_response(
+ self, model: str, outstanding_responses: List[Awaitable[AsyncIterator[ChatCompletionResponseStreamChunk]]]
+ ):
+ id = f"chatcmpl-{uuid.uuid4()}"
+ for outstanding_response in outstanding_responses:
+ response = await outstanding_response
+ i = 0
+ async for chunk in response:
+ event = chunk.event
+ finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason)
+
+ if isinstance(event.delta, TextDelta):
+ text_delta = event.delta.text
+ delta = OpenAIChoiceDelta(content=text_delta)
+ yield OpenAIChatCompletionChunk(
+ id=id,
+ choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)],
+ created=int(time.time()),
+ model=model,
+ object="chat.completion.chunk",
+ )
+ elif isinstance(event.delta, ToolCallDelta):
+ if event.delta.parse_status == ToolCallParseStatus.succeeded:
+ tool_call = event.delta.tool_call
+ openai_tool_call = OpenAIChoiceDeltaToolCall(
+ index=0,
+ id=tool_call.call_id,
+ function=OpenAIChoiceDeltaToolCallFunction(
+ name=tool_call.tool_name, arguments=tool_call.arguments_json
+ ),
+ )
+ delta = OpenAIChoiceDelta(tool_calls=[openai_tool_call])
+ yield OpenAIChatCompletionChunk(
+ id=id,
+ choices=[
+ OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)
+ ],
+ created=int(time.time()),
+ model=model,
+ object="chat.completion.chunk",
+ )
+ i = i + 1
+
+ async def _process_non_stream_response(
+ self, model: str, outstanding_responses: List[Awaitable[ChatCompletionResponse]]
) -> OpenAIChatCompletion:
- raise ValueError(f"{self.__class__.__name__} doesn't support openai chat completion")
+ choices = []
+ for outstanding_response in outstanding_responses:
+ response = await outstanding_response
+ completion_message = response.completion_message
+ message = await convert_message_to_openai_dict_new(completion_message)
+ finish_reason = _convert_stop_reason_to_openai_finish_reason(completion_message.stop_reason)
+
+ choice = OpenAIChatCompletionChoice(
+ index=len(choices),
+ message=message,
+ finish_reason=finish_reason,
+ )
+ choices.append(choice)
+
+ return OpenAIChatCompletion(
+ id=f"chatcmpl-{uuid.uuid4()}",
+ choices=choices,
+ created=int(time.time()),
+ model=model,
+ object="chat.completion",
+ )
diff --git a/llama_stack/providers/utils/scheduler.py b/llama_stack/providers/utils/scheduler.py
new file mode 100644
index 000000000..d4cffe605
--- /dev/null
+++ b/llama_stack/providers/utils/scheduler.py
@@ -0,0 +1,265 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import abc
+import asyncio
+import functools
+import threading
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, Callable, Coroutine, Dict, Iterable, Tuple, TypeAlias
+
+from pydantic import BaseModel
+
+from llama_stack.log import get_logger
+
+logger = get_logger(name=__name__, category="scheduler")
+
+
+# TODO: revisit the list of possible statuses when defining a more coherent
+# Jobs API for all API flows; e.g. do we need new vs scheduled?
+class JobStatus(Enum):
+ new = "new"
+ scheduled = "scheduled"
+ running = "running"
+ failed = "failed"
+ completed = "completed"
+
+
+JobID: TypeAlias = str
+JobType: TypeAlias = str
+
+
+class JobArtifact(BaseModel):
+ type: JobType
+ name: str
+ # TODO: uri should be a reference to /files API; revisit when /files is implemented
+ uri: str | None = None
+ metadata: Dict[str, Any]
+
+
+JobHandler = Callable[
+ [Callable[[str], None], Callable[[JobStatus], None], Callable[[JobArtifact], None]], Coroutine[Any, Any, None]
+]
+
+
+LogMessage: TypeAlias = Tuple[datetime, str]
+
+
+_COMPLETED_STATUSES = {JobStatus.completed, JobStatus.failed}
+
+
+class Job:
+ def __init__(self, job_type: JobType, job_id: JobID, handler: JobHandler):
+ super().__init__()
+ self.id = job_id
+ self._type = job_type
+ self._handler = handler
+ self._artifacts: list[JobArtifact] = []
+ self._logs: list[LogMessage] = []
+ self._state_transitions: list[Tuple[datetime, JobStatus]] = [(datetime.now(timezone.utc), JobStatus.new)]
+
+ @property
+ def handler(self) -> JobHandler:
+ return self._handler
+
+ @property
+ def status(self) -> JobStatus:
+ return self._state_transitions[-1][1]
+
+ @status.setter
+ def status(self, status: JobStatus):
+ if status in _COMPLETED_STATUSES and self.status in _COMPLETED_STATUSES:
+ raise ValueError(f"Job is already in a completed state ({self.status})")
+ if self.status == status:
+ return
+ self._state_transitions.append((datetime.now(timezone.utc), status))
+
+ @property
+ def artifacts(self) -> list[JobArtifact]:
+ return self._artifacts
+
+ def register_artifact(self, artifact: JobArtifact) -> None:
+ self._artifacts.append(artifact)
+
+ def _find_state_transition_date(self, status: Iterable[JobStatus]) -> datetime | None:
+ for date, s in reversed(self._state_transitions):
+ if s in status:
+ return date
+ return None
+
+ @property
+ def scheduled_at(self) -> datetime | None:
+ return self._find_state_transition_date([JobStatus.scheduled])
+
+ @property
+ def started_at(self) -> datetime | None:
+ return self._find_state_transition_date([JobStatus.running])
+
+ @property
+ def completed_at(self) -> datetime | None:
+ return self._find_state_transition_date(_COMPLETED_STATUSES)
+
+ @property
+ def logs(self) -> list[LogMessage]:
+ return self._logs[:]
+
+ def append_log(self, message: LogMessage) -> None:
+ self._logs.append(message)
+
+ # TODO: implement
+ def cancel(self) -> None:
+ raise NotImplementedError
+
+
+class _SchedulerBackend(abc.ABC):
+ @abc.abstractmethod
+ def on_log_message_cb(self, job: Job, message: LogMessage) -> None:
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def on_status_change_cb(self, job: Job, status: JobStatus) -> None:
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def on_artifact_collected_cb(self, job: Job, artifact: JobArtifact) -> None:
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ async def shutdown(self) -> None:
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def schedule(
+ self,
+ job: Job,
+ on_log_message_cb: Callable[[str], None],
+ on_status_change_cb: Callable[[JobStatus], None],
+ on_artifact_collected_cb: Callable[[JobArtifact], None],
+ ) -> None:
+ raise NotImplementedError
+
+
+class _NaiveSchedulerBackend(_SchedulerBackend):
+ def __init__(self, timeout: int = 5):
+ self._timeout = timeout
+ self._loop = asyncio.new_event_loop()
+ # There may be performance implications of using threads due to Python
+ # GIL; may need to measure if it's a real problem though
+ self._thread = threading.Thread(target=self._run_loop, daemon=True)
+ self._thread.start()
+
+ def _run_loop(self) -> None:
+ asyncio.set_event_loop(self._loop)
+ self._loop.run_forever()
+
+ # When stopping the loop, give tasks a chance to finish
+ # TODO: should we explicitly inform jobs of pending stoppage?
+ for task in asyncio.all_tasks(self._loop):
+ self._loop.run_until_complete(task)
+ self._loop.close()
+
+ async def shutdown(self) -> None:
+ self._loop.call_soon_threadsafe(self._loop.stop)
+ self._thread.join()
+
+ # TODO: decouple scheduling and running the job
+ def schedule(
+ self,
+ job: Job,
+ on_log_message_cb: Callable[[str], None],
+ on_status_change_cb: Callable[[JobStatus], None],
+ on_artifact_collected_cb: Callable[[JobArtifact], None],
+ ) -> None:
+ async def do():
+ try:
+ job.status = JobStatus.running
+ await job.handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb)
+ except Exception as e:
+ on_log_message_cb(str(e))
+ job.status = JobStatus.failed
+ logger.exception(f"Job {job.id} failed.")
+
+ asyncio.run_coroutine_threadsafe(do(), self._loop)
+
+ def on_log_message_cb(self, job: Job, message: LogMessage) -> None:
+ pass
+
+ def on_status_change_cb(self, job: Job, status: JobStatus) -> None:
+ pass
+
+ def on_artifact_collected_cb(self, job: Job, artifact: JobArtifact) -> None:
+ pass
+
+
+_BACKENDS = {
+ "naive": _NaiveSchedulerBackend,
+}
+
+
+def _get_backend_impl(backend: str) -> _SchedulerBackend:
+ try:
+ return _BACKENDS[backend]()
+ except KeyError as e:
+ raise ValueError(f"Unknown backend {backend}") from e
+
+
+class Scheduler:
+ def __init__(self, backend: str = "naive"):
+ # TODO: if server crashes, job states are lost; we need to persist jobs on disc
+ self._jobs: dict[JobID, Job] = {}
+ self._backend = _get_backend_impl(backend)
+
+ def _on_log_message_cb(self, job: Job, message: str) -> None:
+ msg = (datetime.now(timezone.utc), message)
+ # At least for the time being, until there's a better way to expose
+ # logs to users, log messages on console
+ logger.info(f"Job {job.id}: {message}")
+ job.append_log(msg)
+ self._backend.on_log_message_cb(job, msg)
+
+ def _on_status_change_cb(self, job: Job, status: JobStatus) -> None:
+ job.status = status
+ self._backend.on_status_change_cb(job, status)
+
+ def _on_artifact_collected_cb(self, job: Job, artifact: JobArtifact) -> None:
+ job.register_artifact(artifact)
+ self._backend.on_artifact_collected_cb(job, artifact)
+
+ def schedule(self, type_: JobType, job_id: JobID, handler: JobHandler) -> JobID:
+ job = Job(type_, job_id, handler)
+ if job.id in self._jobs:
+ raise ValueError(f"Job {job.id} already exists")
+
+ self._jobs[job.id] = job
+ job.status = JobStatus.scheduled
+ self._backend.schedule(
+ job,
+ functools.partial(self._on_log_message_cb, job),
+ functools.partial(self._on_status_change_cb, job),
+ functools.partial(self._on_artifact_collected_cb, job),
+ )
+
+ return job.id
+
+ def cancel(self, job_id: JobID) -> None:
+ self.get_job(job_id).cancel()
+
+ def get_job(self, job_id: JobID) -> Job:
+ try:
+ return self._jobs[job_id]
+ except KeyError as e:
+ raise ValueError(f"Job {job_id} not found") from e
+
+ def get_jobs(self, type_: JobType | None = None) -> list[Job]:
+ jobs = list(self._jobs.values())
+ if type_:
+ jobs = [job for job in jobs if job._type == type_]
+ return jobs
+
+ async def shutdown(self):
+ # TODO: also cancel jobs once implemented
+ await self._backend.shutdown()
diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml
index ea3b7252a..0dd056405 100644
--- a/llama_stack/templates/dev/run.yaml
+++ b/llama_stack/templates/dev/run.yaml
@@ -386,6 +386,16 @@ models:
provider_id: groq
provider_model_id: groq/llama-4-scout-17b-16e-instruct
model_type: llm
+- metadata: {}
+ model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+ provider_id: groq
+ provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+ model_type: llm
+- metadata: {}
+ model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+ provider_id: groq
+ provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+ model_type: llm
- metadata: {}
model_id: groq/llama-4-maverick-17b-128e-instruct
provider_id: groq
@@ -396,6 +406,16 @@ models:
provider_id: groq
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
model_type: llm
+- metadata: {}
+ model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+ provider_id: groq
+ provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+ model_type: llm
+- metadata: {}
+ model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+ provider_id: groq
+ provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+ model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml
index f557e64fd..444452dcb 100644
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@@ -158,6 +158,16 @@ models:
provider_id: groq
provider_model_id: groq/llama-4-scout-17b-16e-instruct
model_type: llm
+- metadata: {}
+ model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+ provider_id: groq
+ provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+ model_type: llm
+- metadata: {}
+ model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+ provider_id: groq
+ provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+ model_type: llm
- metadata: {}
model_id: groq/llama-4-maverick-17b-128e-instruct
provider_id: groq
@@ -168,6 +178,16 @@ models:
provider_id: groq
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
model_type: llm
+- metadata: {}
+ model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+ provider_id: groq
+ provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+ model_type: llm
+- metadata: {}
+ model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+ provider_id: groq
+ provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+ model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md
index efcdb62c6..fe50e9d49 100644
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@@ -28,7 +28,7 @@ The following environment variables can be configured:
## Setting up vLLM server
-In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM
+In the following sections, we'll use AMD, NVIDIA or Intel GPUs to serve as hardware accelerators for the vLLM
server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
[supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
that we only use GPUs here for demonstration purposes.
@@ -149,6 +149,55 @@ docker run \
--port $SAFETY_PORT
```
+### Setting up vLLM server on Intel GPU
+
+Refer to [vLLM Documentation for XPU](https://docs.vllm.ai/en/v0.8.2/getting_started/installation/gpu.html?device=xpu) to get a vLLM endpoint. In addition to vLLM side setup which guides towards installing vLLM from sources orself-building vLLM Docker container, Intel provides prebuilt vLLM container to use on systems with Intel GPUs supported by PyTorch XPU backend:
+- [intel/vllm](https://hub.docker.com/r/intel/vllm)
+
+Here is a sample script to start a vLLM server locally via Docker using Intel provided container:
+
+```bash
+export INFERENCE_PORT=8000
+export INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
+export ZE_AFFINITY_MASK=0
+
+docker run \
+ --pull always \
+ --device /dev/dri \
+ -v /dev/dri/by-path:/dev/dri/by-path \
+ -v ~/.cache/huggingface:/root/.cache/huggingface \
+ --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+ --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+ -p $INFERENCE_PORT:$INFERENCE_PORT \
+ --ipc=host \
+ intel/vllm:xpu \
+ --gpu-memory-utilization 0.7 \
+ --model $INFERENCE_MODEL \
+ --port $INFERENCE_PORT
+```
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_PORT=8081
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export ZE_AFFINITY_MASK=1
+
+docker run \
+ --pull always \
+ --device /dev/dri \
+ -v /dev/dri/by-path:/dev/dri/by-path \
+ -v ~/.cache/huggingface:/root/.cache/huggingface \
+ --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+ --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+ -p $SAFETY_PORT:$SAFETY_PORT \
+ --ipc=host \
+ intel/vllm:xpu \
+ --gpu-memory-utilization 0.7 \
+ --model $SAFETY_MODEL \
+ --port $SAFETY_PORT
+```
+
## Running Llama Stack
Now you are ready to run Llama Stack with vLLM as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
diff --git a/llama_stack/templates/verification/run.yaml b/llama_stack/templates/verification/run.yaml
index b6c2ca98d..454ecba5b 100644
--- a/llama_stack/templates/verification/run.yaml
+++ b/llama_stack/templates/verification/run.yaml
@@ -474,6 +474,16 @@ models:
provider_id: groq-openai-compat
provider_model_id: groq/llama-4-scout-17b-16e-instruct
model_type: llm
+- metadata: {}
+ model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+ provider_id: groq-openai-compat
+ provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+ model_type: llm
+- metadata: {}
+ model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+ provider_id: groq-openai-compat
+ provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+ model_type: llm
- metadata: {}
model_id: groq/llama-4-maverick-17b-128e-instruct
provider_id: groq-openai-compat
@@ -484,6 +494,16 @@ models:
provider_id: groq-openai-compat
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
model_type: llm
+- metadata: {}
+ model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+ provider_id: groq-openai-compat
+ provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+ model_type: llm
+- metadata: {}
+ model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+ provider_id: groq-openai-compat
+ provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+ model_type: llm
- metadata: {}
model_id: Meta-Llama-3.1-8B-Instruct
provider_id: sambanova-openai-compat
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index 0905d5817..75b53100c 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -115,7 +115,7 @@ def test_openai_completion_streaming(openai_client, client_with_models, text_mod
stream=True,
max_tokens=50,
)
- streamed_content = [chunk.choices[0].text for chunk in response]
+ streamed_content = [chunk.choices[0].text or "" for chunk in response]
content_str = "".join(streamed_content).lower().strip()
assert len(content_str) > 10
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 9c2281d85..88399198d 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -26,7 +26,12 @@ from openai.types.chat.chat_completion_chunk import (
)
from openai.types.model import Model as OpenAIModel
-from llama_stack.apis.inference import ToolChoice, ToolConfig
+from llama_stack.apis.inference import (
+ ChatCompletionRequest,
+ ToolChoice,
+ ToolConfig,
+ UserMessage,
+)
from llama_stack.apis.models import Model
from llama_stack.models.llama.datatypes import StopReason
from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
@@ -232,3 +237,14 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
# above.
asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
assert not asyncio_warnings
+
+
+@pytest.mark.asyncio
+async def test_get_params_empty_tools(vllm_inference_adapter):
+ request = ChatCompletionRequest(
+ tools=[],
+ model="test_model",
+ messages=[UserMessage(content="test")],
+ )
+ params = await vllm_inference_adapter._get_params(request)
+ assert "tools" not in params
diff --git a/tests/unit/providers/utils/test_scheduler.py b/tests/unit/providers/utils/test_scheduler.py
new file mode 100644
index 000000000..76f0da8ce
--- /dev/null
+++ b/tests/unit/providers/utils/test_scheduler.py
@@ -0,0 +1,120 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+
+import pytest
+
+from llama_stack.providers.utils.scheduler import JobStatus, Scheduler
+
+
+@pytest.mark.asyncio
+async def test_scheduler_unknown_backend():
+ with pytest.raises(ValueError):
+ Scheduler(backend="unknown")
+
+
+@pytest.mark.asyncio
+async def test_scheduler_naive():
+ sched = Scheduler()
+
+ # make sure the scheduler starts empty
+ with pytest.raises(ValueError):
+ sched.get_job("unknown")
+ assert sched.get_jobs() == []
+
+ called = False
+
+ # schedule a job that will exercise the handlers
+ async def job_handler(on_log, on_status, on_artifact):
+ nonlocal called
+ called = True
+ # exercise the handlers
+ on_log("test log1")
+ on_log("test log2")
+ on_artifact({"type": "type1", "path": "path1"})
+ on_artifact({"type": "type2", "path": "path2"})
+ on_status(JobStatus.completed)
+
+ job_id = "test_job_id"
+ job_type = "test_job_type"
+ sched.schedule(job_type, job_id, job_handler)
+
+ # make sure the job was properly registered
+ with pytest.raises(ValueError):
+ sched.get_job("unknown")
+ assert sched.get_job(job_id) is not None
+ assert sched.get_jobs() == [sched.get_job(job_id)]
+
+ assert sched.get_jobs("unknown") == []
+ assert sched.get_jobs(job_type) == [sched.get_job(job_id)]
+
+ # now shut the scheduler down and make sure the job ran
+ await sched.shutdown()
+
+ assert called
+
+ job = sched.get_job(job_id)
+ assert job is not None
+
+ assert job.status == JobStatus.completed
+
+ assert job.scheduled_at is not None
+ assert job.started_at is not None
+ assert job.completed_at is not None
+ assert job.scheduled_at < job.started_at < job.completed_at
+
+ assert job.artifacts == [
+ {"type": "type1", "path": "path1"},
+ {"type": "type2", "path": "path2"},
+ ]
+ assert [msg[1] for msg in job.logs] == ["test log1", "test log2"]
+ assert job.logs[0][0] < job.logs[1][0]
+
+
+@pytest.mark.asyncio
+async def test_scheduler_naive_handler_raises():
+ sched = Scheduler()
+
+ async def failing_job_handler(on_log, on_status, on_artifact):
+ on_status(JobStatus.running)
+ raise ValueError("test error")
+
+ job_id = "test_job_id1"
+ job_type = "test_job_type"
+ sched.schedule(job_type, job_id, failing_job_handler)
+
+ job = sched.get_job(job_id)
+ assert job is not None
+
+ # confirm the exception made the job transition to failed state, even
+ # though it was set to `running` before the error
+ for _ in range(10):
+ if job.status == JobStatus.failed:
+ break
+ await asyncio.sleep(0.1)
+ assert job.status == JobStatus.failed
+
+ # confirm that the raised error got registered in log
+ assert job.logs[0][1] == "test error"
+
+ # even after failed job, we can schedule another one
+ called = False
+
+ async def successful_job_handler(on_log, on_status, on_artifact):
+ nonlocal called
+ called = True
+ on_status(JobStatus.completed)
+
+ job_id = "test_job_id2"
+ sched.schedule(job_type, job_id, successful_job_handler)
+
+ await sched.shutdown()
+
+ assert called
+ job = sched.get_job(job_id)
+ assert job is not None
+ assert job.status == JobStatus.completed
diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md
index 2309c6404..2dd0af41b 100644
--- a/tests/verifications/REPORT.md
+++ b/tests/verifications/REPORT.md
@@ -1,6 +1,6 @@
# Test Results Report
-*Generated on: 2025-04-10 16:48:18*
+*Generated on: 2025-04-14 18:11:37*
*This report was generated by running `python tests/verifications/generate_report.py`*
@@ -15,15 +15,15 @@
| Provider | Pass Rate | Tests Passed | Total Tests |
| --- | --- | --- | --- |
-| Together | 64.7% | 22 | 34 |
-| Fireworks | 82.4% | 28 | 34 |
-| Openai | 100.0% | 24 | 24 |
+| Together | 48.7% | 37 | 76 |
+| Fireworks | 47.4% | 36 | 76 |
+| Openai | 100.0% | 52 | 52 |
## Together
-*Tests run on: 2025-04-10 16:46:35*
+*Tests run on: 2025-04-14 18:08:14*
```bash
# Run all tests for this provider:
@@ -48,19 +48,33 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=togethe
| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ |
| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ |
| test_chat_non_streaming_image | ⚪ | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ❌ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ | ✅ |
| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ |
| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ |
| test_chat_non_streaming_tool_calling | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_tool_choice_none | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_tool_choice_required | ✅ | ✅ | ✅ |
| test_chat_streaming_basic (earth) | ✅ | ❌ | ❌ |
| test_chat_streaming_basic (saturn) | ✅ | ❌ | ❌ |
| test_chat_streaming_image | ⚪ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ |
| test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ |
| test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ |
| test_chat_streaming_tool_calling | ✅ | ❌ | ❌ |
+| test_chat_streaming_tool_choice_none | ❌ | ❌ | ❌ |
+| test_chat_streaming_tool_choice_required | ✅ | ❌ | ❌ |
## Fireworks
-*Tests run on: 2025-04-10 16:44:44*
+*Tests run on: 2025-04-14 18:04:06*
```bash
# Run all tests for this provider:
@@ -85,19 +99,33 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=firewor
| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ |
| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ |
| test_chat_non_streaming_image | ⚪ | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ |
| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ |
| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ |
| test_chat_non_streaming_tool_calling | ❌ | ❌ | ❌ |
+| test_chat_non_streaming_tool_choice_none | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_tool_choice_required | ✅ | ❌ | ❌ |
| test_chat_streaming_basic (earth) | ✅ | ✅ | ✅ |
| test_chat_streaming_basic (saturn) | ✅ | ✅ | ✅ |
| test_chat_streaming_image | ⚪ | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ |
+| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ |
| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ |
| test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ |
| test_chat_streaming_tool_calling | ❌ | ❌ | ❌ |
+| test_chat_streaming_tool_choice_none | ✅ | ✅ | ✅ |
+| test_chat_streaming_tool_choice_required | ✅ | ❌ | ❌ |
## Openai
-*Tests run on: 2025-04-10 16:47:28*
+*Tests run on: 2025-04-14 18:09:51*
```bash
# Run all tests for this provider:
@@ -121,12 +149,26 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai
| test_chat_non_streaming_basic (earth) | ✅ | ✅ |
| test_chat_non_streaming_basic (saturn) | ✅ | ✅ |
| test_chat_non_streaming_image | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ |
| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ |
| test_chat_non_streaming_structured_output (math) | ✅ | ✅ |
| test_chat_non_streaming_tool_calling | ✅ | ✅ |
+| test_chat_non_streaming_tool_choice_none | ✅ | ✅ |
+| test_chat_non_streaming_tool_choice_required | ✅ | ✅ |
| test_chat_streaming_basic (earth) | ✅ | ✅ |
| test_chat_streaming_basic (saturn) | ✅ | ✅ |
| test_chat_streaming_image | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ |
| test_chat_streaming_structured_output (calendar) | ✅ | ✅ |
| test_chat_streaming_structured_output (math) | ✅ | ✅ |
| test_chat_streaming_tool_calling | ✅ | ✅ |
+| test_chat_streaming_tool_choice_none | ✅ | ✅ |
+| test_chat_streaming_tool_choice_required | ✅ | ✅ |
diff --git a/tests/verifications/conf/fireworks-llama-stack.yaml b/tests/verifications/conf/fireworks-llama-stack.yaml
new file mode 100644
index 000000000..d91443dd9
--- /dev/null
+++ b/tests/verifications/conf/fireworks-llama-stack.yaml
@@ -0,0 +1,14 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: FIREWORKS_API_KEY
+models:
+- fireworks/llama-v3p3-70b-instruct
+- fireworks/llama4-scout-instruct-basic
+- fireworks/llama4-maverick-instruct-basic
+model_display_names:
+ fireworks/llama-v3p3-70b-instruct: Llama-3.3-70B-Instruct
+ fireworks/llama4-scout-instruct-basic: Llama-4-Scout-Instruct
+ fireworks/llama4-maverick-instruct-basic: Llama-4-Maverick-Instruct
+test_exclusions:
+ fireworks/llama-v3p3-70b-instruct:
+ - test_chat_non_streaming_image
+ - test_chat_streaming_image
diff --git a/tests/verifications/conf/groq-llama-stack.yaml b/tests/verifications/conf/groq-llama-stack.yaml
new file mode 100644
index 000000000..fd5e9abec
--- /dev/null
+++ b/tests/verifications/conf/groq-llama-stack.yaml
@@ -0,0 +1,14 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: GROQ_API_KEY
+models:
+- groq/llama-3.3-70b-versatile
+- groq/llama-4-scout-17b-16e-instruct
+- groq/llama-4-maverick-17b-128e-instruct
+model_display_names:
+ groq/llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
+ groq/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
+ groq/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
+test_exclusions:
+ groq/llama-3.3-70b-versatile:
+ - test_chat_non_streaming_image
+ - test_chat_streaming_image
diff --git a/tests/verifications/conf/groq.yaml b/tests/verifications/conf/groq.yaml
index 7871036dc..76b1244ae 100644
--- a/tests/verifications/conf/groq.yaml
+++ b/tests/verifications/conf/groq.yaml
@@ -2,12 +2,12 @@ base_url: https://api.groq.com/openai/v1
api_key_var: GROQ_API_KEY
models:
- llama-3.3-70b-versatile
-- llama-4-scout-17b-16e-instruct
-- llama-4-maverick-17b-128e-instruct
+- meta-llama/llama-4-scout-17b-16e-instruct
+- meta-llama/llama-4-maverick-17b-128e-instruct
model_display_names:
llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
- llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
- llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
+ meta-llama/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
+ meta-llama/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
test_exclusions:
llama-3.3-70b-versatile:
- test_chat_non_streaming_image
diff --git a/tests/verifications/conf/openai-llama-stack.yaml b/tests/verifications/conf/openai-llama-stack.yaml
new file mode 100644
index 000000000..de35439ae
--- /dev/null
+++ b/tests/verifications/conf/openai-llama-stack.yaml
@@ -0,0 +1,9 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: OPENAI_API_KEY
+models:
+- openai/gpt-4o
+- openai/gpt-4o-mini
+model_display_names:
+ openai/gpt-4o: gpt-4o
+ openai/gpt-4o-mini: gpt-4o-mini
+test_exclusions: {}
diff --git a/tests/verifications/conf/together-llama-stack.yaml b/tests/verifications/conf/together-llama-stack.yaml
new file mode 100644
index 000000000..e49d82604
--- /dev/null
+++ b/tests/verifications/conf/together-llama-stack.yaml
@@ -0,0 +1,14 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: TOGETHER_API_KEY
+models:
+- together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+- together/meta-llama/Llama-4-Scout-17B-16E-Instruct
+- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+model_display_names:
+ together/meta-llama/Llama-3.3-70B-Instruct-Turbo: Llama-3.3-70B-Instruct
+ together/meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct
+ together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8: Llama-4-Maverick-Instruct
+test_exclusions:
+ together/meta-llama/Llama-3.3-70B-Instruct-Turbo:
+ - test_chat_non_streaming_image
+ - test_chat_streaming_image
diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py
index 6a7c39ee2..b39c3fd19 100755
--- a/tests/verifications/generate_report.py
+++ b/tests/verifications/generate_report.py
@@ -67,7 +67,17 @@ RESULTS_DIR.mkdir(exist_ok=True)
# Maximum number of test result files to keep per provider
MAX_RESULTS_PER_PROVIDER = 1
-PROVIDER_ORDER = ["together", "fireworks", "groq", "cerebras", "openai"]
+PROVIDER_ORDER = [
+ "together",
+ "fireworks",
+ "groq",
+ "cerebras",
+ "openai",
+ "together-llama-stack",
+ "fireworks-llama-stack",
+ "groq-llama-stack",
+ "openai-llama-stack",
+]
VERIFICATION_CONFIG = _load_all_verification_configs()
diff --git a/tests/verifications/openai-api-verification-run.yaml b/tests/verifications/openai-api-verification-run.yaml
new file mode 100644
index 000000000..71885d058
--- /dev/null
+++ b/tests/verifications/openai-api-verification-run.yaml
@@ -0,0 +1,146 @@
+version: '2'
+image_name: openai-api-verification
+apis:
+- inference
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+ inference:
+ - provider_id: together
+ provider_type: remote::together
+ config:
+ url: https://api.together.xyz/v1
+ api_key: ${env.TOGETHER_API_KEY:}
+ - provider_id: fireworks
+ provider_type: remote::fireworks
+ config:
+ url: https://api.fireworks.ai/inference/v1
+ api_key: ${env.FIREWORKS_API_KEY}
+ - provider_id: groq
+ provider_type: remote::groq
+ config:
+ url: https://api.groq.com
+ api_key: ${env.GROQ_API_KEY}
+ - provider_id: openai
+ provider_type: remote::openai
+ config:
+ url: https://api.openai.com/v1
+ api_key: ${env.OPENAI_API_KEY:}
+ - provider_id: sentence-transformers
+ provider_type: inline::sentence-transformers
+ config: {}
+ vector_io:
+ - provider_id: faiss
+ provider_type: inline::faiss
+ config:
+ kvstore:
+ type: sqlite
+ namespace: null
+ db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/faiss_store.db
+ telemetry:
+ - provider_id: meta-reference
+ provider_type: inline::meta-reference
+ config:
+ service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+ sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+ sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db}
+ tool_runtime:
+ - provider_id: brave-search
+ provider_type: remote::brave-search
+ config:
+ api_key: ${env.BRAVE_SEARCH_API_KEY:}
+ max_results: 3
+ - provider_id: tavily-search
+ provider_type: remote::tavily-search
+ config:
+ api_key: ${env.TAVILY_SEARCH_API_KEY:}
+ max_results: 3
+ - provider_id: code-interpreter
+ provider_type: inline::code-interpreter
+ config: {}
+ - provider_id: rag-runtime
+ provider_type: inline::rag-runtime
+ config: {}
+ - provider_id: model-context-protocol
+ provider_type: remote::model-context-protocol
+ config: {}
+ - provider_id: wolfram-alpha
+ provider_type: remote::wolfram-alpha
+ config:
+ api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+metadata_store:
+ type: sqlite
+ db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/registry.db
+models:
+- metadata: {}
+ model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+ provider_id: together
+ provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
+ model_type: llm
+- metadata: {}
+ model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
+ provider_id: together
+ provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+ model_type: llm
+- metadata: {}
+ model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+ provider_id: together
+ provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+ model_type: llm
+- metadata: {}
+ model_id: fireworks/llama-v3p3-70b-instruct
+ provider_id: fireworks
+ provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+ model_type: llm
+- metadata: {}
+ model_id: fireworks/llama4-scout-instruct-basic
+ provider_id: fireworks
+ provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
+ model_type: llm
+- metadata: {}
+ model_id: fireworks/llama4-maverick-instruct-basic
+ provider_id: fireworks
+ provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
+ model_type: llm
+- metadata: {}
+ model_id: groq/llama-3.3-70b-versatile
+ provider_id: groq
+ provider_model_id: groq/llama-3.3-70b-versatile
+ model_type: llm
+- metadata: {}
+ model_id: groq/llama-4-scout-17b-16e-instruct
+ provider_id: groq
+ provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+ model_type: llm
+- metadata: {}
+ model_id: groq/llama-4-maverick-17b-128e-instruct
+ provider_id: groq
+ provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+ model_type: llm
+- metadata: {}
+ model_id: openai/gpt-4o
+ provider_id: openai
+ provider_model_id: openai/gpt-4o
+ model_type: llm
+- metadata: {}
+ model_id: openai/gpt-4o-mini
+ provider_id: openai
+ provider_model_id: openai/gpt-4o-mini
+ model_type: llm
+shields: []
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+ provider_id: tavily-search
+- toolgroup_id: builtin::rag
+ provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+ provider_id: code-interpreter
+- toolgroup_id: builtin::wolfram_alpha
+ provider_id: wolfram-alpha
+server:
+ port: 8321
diff --git a/tests/verifications/openai_api/fixtures/fixtures.py b/tests/verifications/openai_api/fixtures/fixtures.py
index 4f8c2e017..940b99b2a 100644
--- a/tests/verifications/openai_api/fixtures/fixtures.py
+++ b/tests/verifications/openai_api/fixtures/fixtures.py
@@ -99,6 +99,9 @@ def model_mapping(provider, providers_model_mapping):
@pytest.fixture
def openai_client(base_url, api_key):
+ # Simplify running against a local Llama Stack
+ if "localhost" in base_url and not api_key:
+ api_key = "empty"
return OpenAI(
base_url=base_url,
api_key=api_key,
diff --git a/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml b/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml
index 78ea8245d..1ace76e34 100644
--- a/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml
+++ b/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml
@@ -131,3 +131,221 @@ test_tool_calling:
type: object
type: function
output: get_weather_tool_call
+
+test_chat_multi_turn_tool_calling:
+ test_name: test_chat_multi_turn_tool_calling
+ test_params:
+ case:
+ - case_id: "text_then_weather_tool"
+ input:
+ messages:
+ - - role: user
+ content: "What's the name of the Sun in latin?"
+ - - role: user
+ content: "What's the weather like in San Francisco?"
+ tools:
+ - function:
+ description: Get the current weather
+ name: get_weather
+ parameters:
+ type: object
+ properties:
+ location:
+ description: "The city and state (both required), e.g. San Francisco, CA."
+ type: string
+ required: ["location"]
+ type: function
+ tool_responses:
+ - response: "{'response': '70 degrees and foggy'}"
+ expected:
+ - num_tool_calls: 0
+ answer: ["sol"]
+ - num_tool_calls: 1
+ tool_name: get_weather
+ tool_arguments:
+ location: "San Francisco, CA"
+ - num_tool_calls: 0
+ answer: ["foggy", "70 degrees"]
+ - case_id: "weather_tool_then_text"
+ input:
+ messages:
+ - - role: user
+ content: "What's the weather like in San Francisco?"
+ tools:
+ - function:
+ description: Get the current weather
+ name: get_weather
+ parameters:
+ type: object
+ properties:
+ location:
+ description: "The city and state (both required), e.g. San Francisco, CA."
+ type: string
+ required: ["location"]
+ type: function
+ tool_responses:
+ - response: "{'response': '70 degrees and foggy'}"
+ expected:
+ - num_tool_calls: 1
+ tool_name: get_weather
+ tool_arguments:
+ location: "San Francisco, CA"
+ - num_tool_calls: 0
+ answer: ["foggy", "70 degrees"]
+ - case_id: "add_product_tool"
+ input:
+ messages:
+ - - role: user
+ content: "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
+ tools:
+ - function:
+ description: Add a new product
+ name: addProduct
+ parameters:
+ type: object
+ properties:
+ name:
+ description: "Name of the product"
+ type: string
+ price:
+ description: "Price of the product"
+ type: number
+ inStock:
+ description: "Availability status of the product."
+ type: boolean
+ tags:
+ description: "List of product tags"
+ type: array
+ items:
+ type: string
+ required: ["name", "price", "inStock"]
+ type: function
+ tool_responses:
+ - response: "{'response': 'Successfully added product with id: 123'}"
+ expected:
+ - num_tool_calls: 1
+ tool_name: addProduct
+ tool_arguments:
+ name: "Widget"
+ price: 19.99
+ inStock: true
+ tags:
+ - "new"
+ - "sale"
+ - num_tool_calls: 0
+ answer: ["123", "product id: 123"]
+ - case_id: "get_then_create_event_tool"
+ input:
+ messages:
+ - - role: system
+ content: "Todays date is 2025-03-01."
+ - role: user
+ content: "Do i have any meetings on March 3rd at 10 am? Yes or no?"
+ - - role: user
+ content: "Alright then, Create an event named 'Team Building', scheduled for that time same time, in the 'Main Conference Room' and add Alice, Bob, Charlie to it. Give me the created event id."
+ tools:
+ - function:
+ description: Create a new event
+ name: create_event
+ parameters:
+ type: object
+ properties:
+ name:
+ description: "Name of the event"
+ type: string
+ date:
+ description: "Date of the event in ISO format"
+ type: string
+ time:
+ description: "Event Time (HH:MM)"
+ type: string
+ location:
+ description: "Location of the event"
+ type: string
+ participants:
+ description: "List of participant names"
+ type: array
+ items:
+ type: string
+ required: ["name", "date", "time", "location", "participants"]
+ type: function
+ - function:
+ description: Get an event by date and time
+ name: get_event
+ parameters:
+ type: object
+ properties:
+ date:
+ description: "Date of the event in ISO format"
+ type: string
+ time:
+ description: "Event Time (HH:MM)"
+ type: string
+ required: ["date", "time"]
+ type: function
+ tool_responses:
+ - response: "{'response': 'No events found for 2025-03-03 at 10:00'}"
+ - response: "{'response': 'Successfully created new event with id: e_123'}"
+ expected:
+ - num_tool_calls: 1
+ tool_name: get_event
+ tool_arguments:
+ date: "2025-03-03"
+ time: "10:00"
+ - num_tool_calls: 0
+ answer: ["no", "no events found", "no meetings"]
+ - num_tool_calls: 1
+ tool_name: create_event
+ tool_arguments:
+ name: "Team Building"
+ date: "2025-03-03"
+ time: "10:00"
+ location: "Main Conference Room"
+ participants:
+ - "Alice"
+ - "Bob"
+ - "Charlie"
+ - num_tool_calls: 0
+ answer: ["e_123", "event id: e_123"]
+ - case_id: "compare_monthly_expense_tool"
+ input:
+ messages:
+ - - role: system
+ content: "Todays date is 2025-03-01."
+ - role: user
+ content: "what was my monthly expense in Jan of this year?"
+ - - role: user
+ content: "Was it less than Feb of last year? Only answer with yes or no."
+ tools:
+ - function:
+ description: Get monthly expense summary
+ name: getMonthlyExpenseSummary
+ parameters:
+ type: object
+ properties:
+ month:
+ description: "Month of the year (1-12)"
+ type: integer
+ year:
+ description: "Year"
+ type: integer
+ required: ["month", "year"]
+ type: function
+ tool_responses:
+ - response: "{'response': 'Total expenses for January 2025: $1000'}"
+ - response: "{'response': 'Total expenses for February 2024: $2000'}"
+ expected:
+ - num_tool_calls: 1
+ tool_name: getMonthlyExpenseSummary
+ tool_arguments:
+ month: 1
+ year: 2025
+ - num_tool_calls: 0
+ answer: ["1000", "$1,000", "1,000"]
+ - num_tool_calls: 1
+ tool_name: getMonthlyExpenseSummary
+ tool_arguments:
+ month: 2
+ year: 2024
+ - num_tool_calls: 0
+ answer: ["yes"]
diff --git a/tests/verifications/openai_api/test_chat_completion.py b/tests/verifications/openai_api/test_chat_completion.py
index 6aee29c3a..62a223afb 100644
--- a/tests/verifications/openai_api/test_chat_completion.py
+++ b/tests/verifications/openai_api/test_chat_completion.py
@@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+import copy
import json
import re
from typing import Any
@@ -243,43 +244,294 @@ def test_chat_streaming_tool_calling(request, openai_client, model, provider, ve
stream=True,
)
- # Accumulate partial tool_calls here
- tool_calls_buffer = {}
- current_id = None
- # Process streaming chunks
- for chunk in stream:
- choice = chunk.choices[0]
- delta = choice.delta
-
- if delta.tool_calls is None:
- continue
-
- for tool_call_delta in delta.tool_calls:
- if tool_call_delta.id:
- current_id = tool_call_delta.id
- call_id = current_id
- func_delta = tool_call_delta.function
-
- if call_id not in tool_calls_buffer:
- tool_calls_buffer[call_id] = {
- "id": call_id,
- "type": tool_call_delta.type,
- "name": func_delta.name,
- "arguments": "",
- }
-
- if func_delta.arguments:
- tool_calls_buffer[call_id]["arguments"] += func_delta.arguments
-
+ _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)
assert len(tool_calls_buffer) == 1
- for call in tool_calls_buffer.values():
+ for call in tool_calls_buffer:
assert len(call["id"]) > 0
- assert call["name"] == "get_weather"
+ function = call["function"]
+ assert function["name"] == "get_weather"
- args_dict = json.loads(call["arguments"])
+ args_dict = json.loads(function["arguments"])
assert "san francisco" in args_dict["location"].lower()
+@pytest.mark.parametrize(
+ "case",
+ chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], # Reusing existing case for now
+ ids=case_id_generator,
+)
+def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ response = openai_client.chat.completions.create(
+ model=model,
+ messages=case["input"]["messages"],
+ tools=case["input"]["tools"],
+ tool_choice="required", # Force tool call
+ stream=False,
+ )
+ print(response)
+
+ assert response.choices[0].message.role == "assistant"
+ assert len(response.choices[0].message.tool_calls) > 0, "Expected tool call when tool_choice='required'"
+ expected_tool_name = case["input"]["tools"][0]["function"]["name"]
+ assert response.choices[0].message.tool_calls[0].function.name == expected_tool_name
+
+
+@pytest.mark.parametrize(
+ "case",
+ chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], # Reusing existing case for now
+ ids=case_id_generator,
+)
+def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ stream = openai_client.chat.completions.create(
+ model=model,
+ messages=case["input"]["messages"],
+ tools=case["input"]["tools"],
+ tool_choice="required", # Force tool call
+ stream=True,
+ )
+
+ _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)
+
+ assert len(tool_calls_buffer) > 0, "Expected tool call when tool_choice='required'"
+ expected_tool_name = case["input"]["tools"][0]["function"]["name"]
+ assert any(call["function"]["name"] == expected_tool_name for call in tool_calls_buffer), (
+ f"Expected tool call '{expected_tool_name}' not found in stream"
+ )
+
+
+@pytest.mark.parametrize(
+ "case",
+ chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], # Reusing existing case for now
+ ids=case_id_generator,
+)
+def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ response = openai_client.chat.completions.create(
+ model=model,
+ messages=case["input"]["messages"],
+ tools=case["input"]["tools"],
+ tool_choice="none",
+ stream=False,
+ )
+
+ assert response.choices[0].message.role == "assistant"
+ assert response.choices[0].message.tool_calls is None, "Expected no tool calls when tool_choice='none'"
+ assert response.choices[0].message.content is not None, "Expected content when tool_choice='none'"
+
+
+@pytest.mark.parametrize(
+ "case",
+ chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], # Reusing existing case for now
+ ids=case_id_generator,
+)
+def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ stream = openai_client.chat.completions.create(
+ model=model,
+ messages=case["input"]["messages"],
+ tools=case["input"]["tools"],
+ tool_choice="none",
+ stream=True,
+ )
+
+ content = ""
+ for chunk in stream:
+ delta = chunk.choices[0].delta
+ if delta.content:
+ content += delta.content
+ assert not delta.tool_calls, "Expected no tool call chunks when tool_choice='none'"
+
+ assert len(content) > 0, "Expected content when tool_choice='none'"
+
+
+@pytest.mark.parametrize(
+ "case",
+ chat_completion_test_cases.get("test_chat_multi_turn_tool_calling", {}).get("test_params", {}).get("case", []),
+ ids=case_id_generator,
+)
+def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):
+ """
+ Test cases for multi-turn tool calling.
+ Tool calls are asserted.
+ Tool responses are provided in the test case.
+ Final response is asserted.
+ """
+
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ # Create a copy of the messages list to avoid modifying the original
+ messages = []
+ tools = case["input"]["tools"]
+ # Use deepcopy to prevent modification across runs/parametrization
+ expected_results = copy.deepcopy(case["expected"])
+ tool_responses = copy.deepcopy(case.get("tool_responses", []))
+ input_messages_turns = copy.deepcopy(case["input"]["messages"])
+
+ # keep going until either
+ # 1. we have messages to test in multi-turn
+ # 2. no messages but last message is tool response
+ while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"):
+ # do not take new messages if last message is tool response
+ if len(messages) == 0 or messages[-1]["role"] != "tool":
+ new_messages = input_messages_turns.pop(0)
+ # Ensure new_messages is a list of message objects
+ if isinstance(new_messages, list):
+ messages.extend(new_messages)
+ else:
+ # If it's a single message object, add it directly
+ messages.append(new_messages)
+
+ # --- API Call ---
+ response = openai_client.chat.completions.create(
+ model=model,
+ messages=messages,
+ tools=tools,
+ stream=False,
+ )
+
+ # --- Process Response ---
+ assistant_message = response.choices[0].message
+ messages.append(assistant_message.model_dump(exclude_unset=True))
+
+ assert assistant_message.role == "assistant"
+
+ # Get the expected result data
+ expected = expected_results.pop(0)
+ num_tool_calls = expected["num_tool_calls"]
+
+ # --- Assertions based on expected result ---
+ assert len(assistant_message.tool_calls or []) == num_tool_calls, (
+ f"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}"
+ )
+
+ if num_tool_calls > 0:
+ tool_call = assistant_message.tool_calls[0]
+ assert tool_call.function.name == expected["tool_name"], (
+ f"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'"
+ )
+ # Parse the JSON string arguments before comparing
+ actual_arguments = json.loads(tool_call.function.arguments)
+ assert actual_arguments == expected["tool_arguments"], (
+ f"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'"
+ )
+
+ # Prepare and append the tool response for the next turn
+ tool_response = tool_responses.pop(0)
+ messages.append(
+ {
+ "role": "tool",
+ "tool_call_id": tool_call.id,
+ "content": tool_response["response"],
+ }
+ )
+ else:
+ assert assistant_message.content is not None, "Expected content, but none received."
+ expected_answers = expected["answer"] # This is now a list
+ content_lower = assistant_message.content.lower()
+ assert any(ans.lower() in content_lower for ans in expected_answers), (
+ f"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'"
+ )
+
+
+@pytest.mark.parametrize(
+ "case",
+ chat_completion_test_cases.get("test_chat_multi_turn_tool_calling", {}).get("test_params", {}).get("case", []),
+ ids=case_id_generator,
+)
+def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):
+ """ """
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ messages = []
+ tools = case["input"]["tools"]
+ expected_results = copy.deepcopy(case["expected"])
+ tool_responses = copy.deepcopy(case.get("tool_responses", []))
+ input_messages_turns = copy.deepcopy(case["input"]["messages"])
+
+ while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"):
+ if len(messages) == 0 or messages[-1]["role"] != "tool":
+ new_messages = input_messages_turns.pop(0)
+ if isinstance(new_messages, list):
+ messages.extend(new_messages)
+ else:
+ messages.append(new_messages)
+
+ # --- API Call (Streaming) ---
+ stream = openai_client.chat.completions.create(
+ model=model,
+ messages=messages,
+ tools=tools,
+ stream=True,
+ )
+
+ # --- Process Stream ---
+ accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)
+
+ # --- Construct Assistant Message for History ---
+ assistant_message_dict = {"role": "assistant"}
+ if accumulated_content:
+ assistant_message_dict["content"] = accumulated_content
+ if accumulated_tool_calls:
+ assistant_message_dict["tool_calls"] = accumulated_tool_calls
+
+ messages.append(assistant_message_dict)
+
+ # --- Assertions ---
+ expected = expected_results.pop(0)
+ num_tool_calls = expected["num_tool_calls"]
+
+ assert len(accumulated_tool_calls or []) == num_tool_calls, (
+ f"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}"
+ )
+
+ if num_tool_calls > 0:
+ # Use the first accumulated tool call for assertion
+ tool_call = accumulated_tool_calls[0]
+ assert tool_call["function"]["name"] == expected["tool_name"], (
+ f"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'"
+ )
+ # Parse the accumulated arguments string for comparison
+ actual_arguments = json.loads(tool_call["function"]["arguments"])
+ assert actual_arguments == expected["tool_arguments"], (
+ f"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'"
+ )
+
+ # Prepare and append the tool response for the next turn
+ tool_response = tool_responses.pop(0)
+ messages.append(
+ {
+ "role": "tool",
+ "tool_call_id": tool_call["id"],
+ "content": tool_response["response"],
+ }
+ )
+ else:
+ assert accumulated_content is not None and accumulated_content != "", "Expected content, but none received."
+ expected_answers = expected["answer"]
+ content_lower = accumulated_content.lower()
+ assert any(ans.lower() in content_lower for ans in expected_answers), (
+ f"Expected one of {expected_answers} in content, but got: '{accumulated_content}'"
+ )
+
+
# --- Helper functions (structured output validation) ---
@@ -324,3 +576,47 @@ def validate_structured_output(maybe_json_content: str, schema_name: str) -> Non
assert len(structured_output.participants) == 2
elif schema_name == "valid_math_reasoning":
assert len(structured_output.final_answer) > 0
+
+
+def _accumulate_streaming_tool_calls(stream):
+ """Accumulates tool calls and content from a streaming ChatCompletion response."""
+ tool_calls_buffer = {}
+ current_id = None
+ full_content = "" # Initialize content accumulator
+ # Process streaming chunks
+ for chunk in stream:
+ choice = chunk.choices[0]
+ delta = choice.delta
+
+ # Accumulate content
+ if delta.content:
+ full_content += delta.content
+
+ if delta.tool_calls is None:
+ continue
+
+ for tool_call_delta in delta.tool_calls:
+ if tool_call_delta.id:
+ current_id = tool_call_delta.id
+ call_id = current_id
+ # Skip if no ID seen yet for this tool call delta
+ if not call_id:
+ continue
+ func_delta = tool_call_delta.function
+
+ if call_id not in tool_calls_buffer:
+ tool_calls_buffer[call_id] = {
+ "id": call_id,
+ "type": "function", # Assume function type
+ "function": {"name": None, "arguments": ""}, # Nested structure
+ }
+
+ # Accumulate name and arguments into the nested function dict
+ if func_delta:
+ if func_delta.name:
+ tool_calls_buffer[call_id]["function"]["name"] = func_delta.name
+ if func_delta.arguments:
+ tool_calls_buffer[call_id]["function"]["arguments"] += func_delta.arguments
+
+ # Return content and tool calls as a list
+ return full_content, list(tool_calls_buffer.values())
diff --git a/tests/verifications/test_results/fireworks.json b/tests/verifications/test_results/fireworks.json
index 061e44c08..1fb6cb1b4 100644
--- a/tests/verifications/test_results/fireworks.json
+++ b/tests/verifications/test_results/fireworks.json
@@ -1,15 +1,15 @@
{
- "created": 1744328795.171092,
- "duration": 107.57908606529236,
+ "created": 1744679294.344288,
+ "duration": 243.49469900131226,
"exitcode": 1,
"root": "/Users/erichuang/projects/llama-stack",
"environment": {},
"summary": {
- "passed": 28,
+ "passed": 36,
"skipped": 2,
- "failed": 6,
- "total": 36,
- "collected": 36
+ "failed": 40,
+ "total": 78,
+ "collected": 78
},
"collectors": [
{
@@ -29,182 +29,392 @@
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
- "lineno": 116
+ "lineno": 117
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
- "lineno": 116
+ "lineno": 117
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
- "lineno": 116
+ "lineno": 117
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
- "lineno": 135
+ "lineno": 136
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
- "lineno": 135
+ "lineno": 136
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
- "lineno": 135
+ "lineno": 136
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
- "lineno": 204
+ "lineno": 205
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
- "lineno": 204
+ "lineno": 205
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
- "lineno": 204
+ "lineno": 205
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
- "lineno": 228
+ "lineno": 229
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
- "lineno": 228
+ "lineno": 229
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
- "lineno": 228
+ "lineno": 229
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "type": "Function",
+ "lineno": 257
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "type": "Function",
+ "lineno": 257
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "type": "Function",
+ "lineno": 257
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "type": "Function",
+ "lineno": 281
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "type": "Function",
+ "lineno": 281
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "type": "Function",
+ "lineno": 281
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "type": "Function",
+ "lineno": 308
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "type": "Function",
+ "lineno": 308
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "type": "Function",
+ "lineno": 308
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "type": "Function",
+ "lineno": 331
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "type": "Function",
+ "lineno": 331
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "type": "Function",
+ "lineno": 331
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 450
}
]
}
@@ -212,7 +422,7 @@
"tests": [
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
@@ -231,21 +441,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.2175025000469759,
+ "duration": 0.2540216660127044,
"outcome": "passed"
},
"call": {
- "duration": 0.7433859170414507,
+ "duration": 0.6861197501420975,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0001592918997630477,
+ "duration": 0.00015208404511213303,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
@@ -264,21 +474,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.007383499993011355,
+ "duration": 0.006722707999870181,
"outcome": "passed"
},
"call": {
- "duration": 0.5949292909353971,
+ "duration": 0.5997684169560671,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00015891704242676497,
+ "duration": 0.0002298750914633274,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
@@ -297,21 +507,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.010730999987572432,
+ "duration": 0.015468083089217544,
"outcome": "passed"
},
"call": {
- "duration": 0.8945954169612378,
+ "duration": 0.4625723329372704,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0003751249751076102,
+ "duration": 0.0003302919212728739,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
@@ -330,21 +540,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.01665666699409485,
+ "duration": 0.014780875062569976,
"outcome": "passed"
},
"call": {
- "duration": 0.907927209045738,
+ "duration": 0.4616922920104116,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00024874997325241566,
+ "duration": 0.0004110001027584076,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
@@ -363,21 +573,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.01039199996739626,
+ "duration": 0.016551292035728693,
"outcome": "passed"
},
"call": {
- "duration": 0.5971567500382662,
+ "duration": 0.9366653750184923,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0003488330403342843,
+ "duration": 0.00045104208402335644,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
@@ -396,21 +606,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.018627874902449548,
+ "duration": 0.043513541808351874,
"outcome": "passed"
},
"call": {
- "duration": 2.0586736251134425,
+ "duration": 0.5119727500714362,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00046974990982562304,
+ "duration": 0.00016754190437495708,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
@@ -429,21 +639,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.01706262503284961,
+ "duration": 0.008419709047302604,
"outcome": "passed"
},
"call": {
- "duration": 0.6679969580145553,
+ "duration": 0.7933078748174012,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0004670419730246067,
+ "duration": 0.00016583292745053768,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
@@ -462,21 +672,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.025956374942325056,
+ "duration": 0.013550583040341735,
"outcome": "passed"
},
"call": {
- "duration": 2.052679874934256,
+ "duration": 0.6633435001131147,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00026958296075463295,
+ "duration": 0.00023925001733005047,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
@@ -495,21 +705,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.015856957994401455,
+ "duration": 0.007293834118172526,
"outcome": "passed"
},
"call": {
- "duration": 0.3096678329166025,
+ "duration": 0.5193503750488162,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0007620420074090362,
+ "duration": 0.00018516601994633675,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
@@ -528,21 +738,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.013509334065020084,
+ "duration": 0.009030540939420462,
"outcome": "passed"
},
"call": {
- "duration": 0.5914681670255959,
+ "duration": 0.4338789170142263,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002906669396907091,
+ "duration": 0.0004670829512178898,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
@@ -561,21 +771,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.013216375024057925,
+ "duration": 0.01854533306322992,
"outcome": "passed"
},
"call": {
- "duration": 1.8804527079919353,
+ "duration": 1.0042304168455303,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002026669681072235,
+ "duration": 0.0004844998475164175,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
@@ -594,21 +804,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.00827441702131182,
+ "duration": 0.018001709133386612,
"outcome": "passed"
},
"call": {
- "duration": 0.7407040420221165,
+ "duration": 0.5567380839493126,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0005084159784018993,
+ "duration": 0.00015412503853440285,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
- "lineno": 116,
+ "lineno": 117,
"outcome": "skipped",
"keywords": [
"test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -627,22 +837,22 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.012424499960616231,
+ "duration": 0.008420375175774097,
"outcome": "passed"
},
"call": {
- "duration": 0.00032762496266514063,
+ "duration": 0.00015591713599860668,
"outcome": "skipped",
- "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+ "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 126, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
},
"teardown": {
- "duration": 0.00032416603062301874,
+ "duration": 0.0001371251419186592,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
- "lineno": 116,
+ "lineno": 117,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -661,21 +871,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.02253958396613598,
+ "duration": 0.00672045792452991,
"outcome": "passed"
},
"call": {
- "duration": 2.64042466704268,
+ "duration": 1.790064417058602,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0003636250039562583,
+ "duration": 0.0004657919052988291,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
- "lineno": 116,
+ "lineno": 117,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -694,21 +904,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.014634749968536198,
+ "duration": 0.015534916892647743,
"outcome": "passed"
},
"call": {
- "duration": 5.126485540997237,
+ "duration": 3.2250108749140054,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002988330088555813,
+ "duration": 0.00038420804776251316,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
- "lineno": 135,
+ "lineno": 136,
"outcome": "skipped",
"keywords": [
"test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -727,22 +937,22 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.015854416065849364,
+ "duration": 0.03246337501332164,
"outcome": "passed"
},
"call": {
- "duration": 0.00038058299105614424,
+ "duration": 0.0005176670383661985,
"outcome": "skipped",
- "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+ "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 145, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
},
"teardown": {
- "duration": 0.0002689170651137829,
+ "duration": 0.0002715419977903366,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
- "lineno": 135,
+ "lineno": 136,
"outcome": "passed",
"keywords": [
"test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -761,21 +971,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.011205915943719447,
+ "duration": 0.12475762516260147,
"outcome": "passed"
},
"call": {
- "duration": 3.2596546669956297,
+ "duration": 4.934706958010793,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0006222500232979655,
+ "duration": 0.00027604191564023495,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
- "lineno": 135,
+ "lineno": 136,
"outcome": "passed",
"keywords": [
"test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -794,21 +1004,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.016557667055167258,
+ "duration": 0.01025745808146894,
"outcome": "passed"
},
"call": {
- "duration": 4.930164708988741,
+ "duration": 3.5653172079473734,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00048687495291233063,
+ "duration": 0.0005323749501258135,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
@@ -827,21 +1037,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.00886166701093316,
+ "duration": 0.0553184999153018,
"outcome": "passed"
},
"call": {
- "duration": 0.8833738330285996,
+ "duration": 1.366144834086299,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00025583396200090647,
+ "duration": 0.00042316620238125324,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
@@ -860,21 +1070,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.01297520799562335,
+ "duration": 0.06981937494128942,
"outcome": "passed"
},
"call": {
- "duration": 1.9960687910206616,
+ "duration": 2.829931082902476,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0005048330640420318,
+ "duration": 0.0003029161598533392,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
@@ -893,21 +1103,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.007275875075720251,
+ "duration": 0.0244335001334548,
"outcome": "passed"
},
"call": {
- "duration": 0.9094266659813002,
+ "duration": 0.7541109579615295,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00028041598852723837,
+ "duration": 0.0004666249733418226,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
@@ -926,21 +1136,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.008899332955479622,
+ "duration": 0.016700832871720195,
"outcome": "passed"
},
"call": {
- "duration": 3.117967874975875,
+ "duration": 2.208378749899566,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00017600005958229303,
+ "duration": 0.00016137491911649704,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
@@ -959,21 +1169,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.0073364999843761325,
+ "duration": 0.006982124876230955,
"outcome": "passed"
},
"call": {
- "duration": 2.2714374579954892,
+ "duration": 0.6431179158389568,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0001814159331843257,
+ "duration": 0.00033412501215934753,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
@@ -992,21 +1202,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.010546459001488984,
+ "duration": 0.015676999930292368,
"outcome": "passed"
},
"call": {
- "duration": 3.9954450000077486,
+ "duration": 4.404933541081846,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002719159238040447,
+ "duration": 0.0002617498394101858,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
@@ -1025,21 +1235,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.012508000014349818,
+ "duration": 0.07572970795445144,
"outcome": "passed"
},
"call": {
- "duration": 9.095425167004578,
+ "duration": 1.1367775409016758,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00029200001154094934,
+ "duration": 0.0006681671366095543,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
@@ -1058,21 +1268,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.014769250061362982,
+ "duration": 0.028525790898129344,
"outcome": "passed"
},
"call": {
- "duration": 1.9875252910424024,
+ "duration": 2.1424834579229355,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0006288329605013132,
+ "duration": 0.0003642500378191471,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
@@ -1091,21 +1301,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.014440709026530385,
+ "duration": 0.0146782910451293,
"outcome": "passed"
},
"call": {
- "duration": 1.2613736250204965,
+ "duration": 15.13383225002326,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0001937919296324253,
+ "duration": 0.00045950012281537056,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
@@ -1124,21 +1334,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.0071510839043185115,
+ "duration": 0.01714799995534122,
"outcome": "passed"
},
"call": {
- "duration": 2.2953888749470934,
+ "duration": 10.714752790983766,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00016245793085545301,
+ "duration": 0.00027029216289520264,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
@@ -1157,21 +1367,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.007294666953384876,
+ "duration": 0.010765291983261704,
"outcome": "passed"
},
"call": {
- "duration": 2.194703874993138,
+ "duration": 0.6682700838427991,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00017604196909815073,
+ "duration": 0.00015808409079909325,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
@@ -1190,21 +1400,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.019950625021010637,
+ "duration": 0.0071080829948186874,
"outcome": "passed"
},
"call": {
- "duration": 8.4994609169662,
+ "duration": 1.9725822920445353,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00026404205709695816,
+ "duration": 0.0004201668780297041,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
- "lineno": 204,
+ "lineno": 205,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -1223,34 +1433,34 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.011928000021725893,
+ "duration": 0.013940333155915141,
"outcome": "passed"
},
"call": {
- "duration": 0.5664792089955881,
+ "duration": 0.5732313331682235,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 223,
+ "lineno": 224,
"message": "TypeError: object of type 'NoneType' has no len()"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 223,
+ "lineno": 224,
"message": "TypeError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:224: TypeError"
},
"teardown": {
- "duration": 0.00023799994960427284,
+ "duration": 0.00022962503135204315,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
- "lineno": 204,
+ "lineno": 205,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -1269,34 +1479,34 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.006813624990172684,
+ "duration": 0.006374292075634003,
"outcome": "passed"
},
"call": {
- "duration": 3.170418416033499,
+ "duration": 7.2776273330673575,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 223,
+ "lineno": 224,
"message": "TypeError: object of type 'NoneType' has no len()"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 223,
+ "lineno": 224,
"message": "TypeError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:224: TypeError"
},
"teardown": {
- "duration": 0.0004129580920562148,
+ "duration": 0.0004100420046597719,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
- "lineno": 204,
+ "lineno": 205,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -1315,34 +1525,34 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.01656208303757012,
+ "duration": 0.012761292047798634,
"outcome": "passed"
},
"call": {
- "duration": 22.76337137504015,
+ "duration": 0.8920639578718692,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 223,
+ "lineno": 224,
"message": "TypeError: object of type 'NoneType' has no len()"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 223,
+ "lineno": 224,
"message": "TypeError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:224: TypeError"
},
"teardown": {
- "duration": 0.00038704206235706806,
+ "duration": 0.0004124999977648258,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
- "lineno": 228,
+ "lineno": 229,
"outcome": "failed",
"keywords": [
"test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -1361,34 +1571,34 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.015727541991509497,
+ "duration": 0.013205124996602535,
"outcome": "passed"
},
"call": {
- "duration": 0.5719050420448184,
+ "duration": 1.930448625003919,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 274,
- "message": "assert 0 == 1\n + where 0 = len({})"
+ "lineno": 248,
+ "message": "assert 0 == 1\n + where 0 = len([])"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 274,
+ "lineno": 248,
"message": "AssertionError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n choice = chunk.choices[0]\n delta = choice.delta\n \n if delta.tool_calls is None:\n continue\n \n for tool_call_delta in delta.tool_calls:\n if tool_call_delta.id:\n current_id = tool_call_delta.id\n call_id = current_id\n func_delta = tool_call_delta.function\n \n if call_id not in tool_calls_buffer:\n tool_calls_buffer[call_id] = {\n \"id\": call_id,\n \"type\": tool_call_delta.type,\n \"name\": func_delta.name,\n \"arguments\": \"\",\n }\n \n if func_delta.arguments:\n tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n \n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:248: AssertionError"
},
"teardown": {
- "duration": 0.0003532909322530031,
+ "duration": 0.0005771249998360872,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
- "lineno": 228,
+ "lineno": 229,
"outcome": "failed",
"keywords": [
"test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -1407,34 +1617,34 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.011914041941054165,
+ "duration": 0.01408083294518292,
"outcome": "passed"
},
"call": {
- "duration": 5.403063916950487,
+ "duration": 10.029349042102695,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 274,
- "message": "assert 0 == 1\n + where 0 = len({})"
+ "lineno": 248,
+ "message": "assert 0 == 1\n + where 0 = len([])"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 274,
+ "lineno": 248,
"message": "AssertionError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n choice = chunk.choices[0]\n delta = choice.delta\n \n if delta.tool_calls is None:\n continue\n \n for tool_call_delta in delta.tool_calls:\n if tool_call_delta.id:\n current_id = tool_call_delta.id\n call_id = current_id\n func_delta = tool_call_delta.function\n \n if call_id not in tool_calls_buffer:\n tool_calls_buffer[call_id] = {\n \"id\": call_id,\n \"type\": tool_call_delta.type,\n \"name\": func_delta.name,\n \"arguments\": \"\",\n }\n \n if func_delta.arguments:\n tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n \n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:248: AssertionError"
},
"teardown": {
- "duration": 0.0005193749675527215,
+ "duration": 0.0004449589177966118,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
- "lineno": 228,
+ "lineno": 229,
"outcome": "failed",
"keywords": [
"test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -1453,31 +1663,1859 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.012608832912519574,
+ "duration": 0.013213291997089982,
"outcome": "passed"
},
"call": {
- "duration": 7.587262416025624,
+ "duration": 8.608150291023776,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 274,
- "message": "assert 0 == 1\n + where 0 = len({})"
+ "lineno": 248,
+ "message": "assert 0 == 1\n + where 0 = len([])"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 274,
+ "lineno": 248,
"message": "AssertionError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n choice = chunk.choices[0]\n delta = choice.delta\n \n if delta.tool_calls is None:\n continue\n \n for tool_call_delta in delta.tool_calls:\n if tool_call_delta.id:\n current_id = tool_call_delta.id\n call_id = current_id\n func_delta = tool_call_delta.function\n \n if call_id not in tool_calls_buffer:\n tool_calls_buffer[call_id] = {\n \"id\": call_id,\n \"type\": tool_call_delta.type,\n \"name\": func_delta.name,\n \"arguments\": \"\",\n }\n \n if func_delta.arguments:\n tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n \n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:248: AssertionError"
},
"teardown": {
- "duration": 0.0008685829816386104,
+ "duration": 0.0005860829260200262,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "lineno": 257,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.01437820796854794,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.7105170420836657,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00017283298075199127,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "lineno": 257,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.009220415959134698,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 5.718667333945632,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 277,
+ "message": "TypeError: object of type 'NoneType' has no len()"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 277,
+ "message": "TypeError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:277: TypeError"
+ },
+ "teardown": {
+ "duration": 0.0003282078541815281,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "lineno": 257,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.014709000010043383,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.7260455000214279,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 277,
+ "message": "TypeError: object of type 'NoneType' has no len()"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 277,
+ "message": "TypeError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:277: TypeError"
+ },
+ "teardown": {
+ "duration": 0.00022012507542967796,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "lineno": 281,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.008183792000636458,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.9683502500411123,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0007690000347793102,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "lineno": 281,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.014906208030879498,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 11.76459054206498,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 302,
+ "message": "AssertionError: Expected tool call when tool_choice='required'\nassert 0 > 0\n + where 0 = len([])"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 302,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n \n> assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE AssertionError: Expected tool call when tool_choice='required'\nE assert 0 > 0\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:302: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0003086249344050884,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "lineno": 281,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.021144041791558266,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.4300453749019653,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 302,
+ "message": "AssertionError: Expected tool call when tool_choice='required'\nassert 0 > 0\n + where 0 = len([])"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 302,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n \n> assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE AssertionError: Expected tool call when tool_choice='required'\nE assert 0 > 0\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:302: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00037800008431077003,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "lineno": 308,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.007929167011752725,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.0130669160280377,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0004307499621063471,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "lineno": 308,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.010822792071849108,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 4.663267957977951,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0006220841314643621,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "lineno": 308,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.010691167088225484,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.383276625070721,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00047616707161068916,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "lineno": 331,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.030178457964211702,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.4668415829073638,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0007963338866829872,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "lineno": 331,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.011727249948307872,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 11.540696125011891,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0009242501109838486,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "lineno": 331,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.008536209119483829,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.6622679999563843,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0005495408549904823,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.017524708062410355,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.625571500044316,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 446,
+ "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to execute this task as it exceeds the limitations of the functions I have been given.'\nassert False\n + where False = any(. at 0x1073e5cb0>)"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 446,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to execute this task as it exceeds the limitations of the functions I have been given.'\nE assert False\nE + where False = any(. at 0x1073e5cb0>)\n\ntests/verifications/openai_api/test_chat_completion.py:446: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00044062500819563866,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.01056775008328259,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5624969999771565,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0004401658661663532,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.013444249983876944,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.8705885419622064,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0004647918976843357,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.013817500090226531,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.6882082498632371,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0005112909711897373,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.013548000017181039,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5821714580524713,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00021225004456937313,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.0070156671572476625,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 8.95718324999325,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='```\\n{\\n \"name\": \"get_weather\",\\n \"parameters\": {\\n \"description\": \"Get the current weather\",\\n \"parameters\": {\\n \"location\": {\\n \"description\": \"The city and state (both required)\",\\n \"type\": \"object\",\\n \"properties\": {\\n \"location\": {\\n \"description\": \"The city and state, e.g. San Francisco, CA.\",\\n \"type\": \"string\"\\n }\\n }\\n }\\n },\\n \"type\": \"object\",\\n \"properties\": {\\n \"location\": \"San Francisco, CA.\"\\n }\\n }\\n}\\n```', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='```\\n{\\n \"name\": \"get_weather\",\\n \"parameters\": {\\n \"description\": \"Get the current weather\",\\n \"parameters\": {\\n \"location\": {\\n \"description\": \"The city and state (both required)\",\\n \"type\": \"object\",\\n \"properties\": {\\n \"location\": {\\n \"description\": \"The city and state, e.g. San Francisco, CA.\",\\n \"type\": \"string\"\\n }\\n }\\n }\\n },\\n \"type\": \"object\",\\n \"properties\": {\\n \"location\": \"San Francisco, CA.\"\\n }\\n }\\n}\\n```', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00045741605572402477,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.011042665923014283,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.372867708094418,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00042333384044468403,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.01305404189042747,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.5883425418287516,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"description\": \"Add a new product\", \"type\": \"object\", \"properties\": {\"name\": {\"description\": \"Name of the product\", \"type\": \"string\"}, \"price\": {\"description\": \"Price of the product\", \"type\": \"number\"}, \"inStock\": {\"description\": \"Availability status of the product\", \"type\": \"boolean\"}, \"tags\": {\"description\": \"List of product tags\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}}, \"required\": [\"name\", \"price\", \"inStock\", \"tags\"]}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"description\": \"Add a new product\", \"type\": \"object\", \"properties\": {\"name\": {\"description\": \"Name of the product\", \"type\": \"string\"}, \"price\": {\"description\": \"Price of the product\", \"type\": \"number\"}, \"inStock\": {\"description\": \"Availability status of the product\", \"type\": \"boolean\"}, \"tags\": {\"description\": \"List of product tags\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}}, \"required\": [\"name\", \"price\", \"inStock\", \"tags\"]}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0005818749777972698,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.01428320910781622,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 15.402638916159049,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event...: \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event...: \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0004401251208037138,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.021037542028352618,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 6.548705333843827,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00035033305175602436,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.00768870790489018,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.410787041997537,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='To answer the question about the weather in San Francisco, we can directly utilize the provided function `get_weather` as it matches the context of the query.\\n\\nThe function `get_weather` requires a `location` parameter. Given that San Francisco is a city and assuming California (CA) is the state, we can directly fit the query into the provided function format.\\n\\nHere\\'s the response in the required JSON format:\\n\\n```json\\n{\\n \"name\": \"get_weather\",\\n \"parameters\": {\\n \"location\": \"San Francisco, CA\"\\n }\\n}\\n```', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='To answer the question about the weather in San Francisco, we can directly utilize the provided function `get_weather` as it matches the context of the query.\\n\\nThe function `get_weather` requires a `location` parameter. Given that San Francisco is a city and assuming California (CA) is the state, we can directly fit the query into the provided function format.\\n\\nHere\\'s the response in the required JSON format:\\n\\n```json\\n{\\n \"name\": \"get_weather\",\\n \"parameters\": {\\n \"location\": \"San Francisco, CA\"\\n }\\n}\\n```', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0002946250606328249,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.009200166910886765,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5177558751311153,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00025020912289619446,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.007124624913558364,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.6132153749931604,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0003745418507605791,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.01410404103808105,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.3956649999599904,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}<|uniquepaddingtoken557|>---\"\"\"\"\"\"\"\"\"---\" \" \" \"\"\" \" \" \"Interaction\"\"\\n\\nI am unable to execute this task as it exceeds the limitations of the functions I have at hand.\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}<|uniquepaddingtoken557|>---\"\"\"\"\"\"\"\"\"---\" \" \" \"\"\" \" \" \"Interaction\"\"\\n\\nI am unable to execute this task as it exceeds the limitations of the functions I have at hand.\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00041033304296433926,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.027331124991178513,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.465563999954611,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\\n\\nThe provided JSON describes a function `getMonthlyExpenseSummary` that takes two parameters: `month` and `year`. The prompt asks for the monthly expense in January of this year. Assuming the current year is 2024, we can fill in the appropriate values for `month` and `year`.\\n\\nThe value for `month` should be `1` (January is the first month), and the value for `year` should be `2024`.\\n\\nTherefore, the appropriate function call with its arguments is:assistant\\n\\nimport datetime\\n\\n# Get the current year\\ncurrent_year = datetime.datetime.now().year\\n\\n# The function call with its arguments\\nprint({\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": current_year}})\"{\\\\\"name\\\\\": \\\\\"getMonthlyExpenseSummary\\\\\", \\\\\"parameters\\\\\": {\\\\\"month\\\\\": 1, \\\\\"year\\\\\": 2024}}\"assistant\\n\\nThe final response is: {\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\\n\\nThe provided JSON describes a function `getMonthlyExpenseSummary` that takes two parameters: `month` and `year`. The prompt asks for the monthly expense in January of this year. Assuming the current year is 2024, we can fill in the appropriate values for `month` and `year`.\\n\\nThe value for `month` should be `1` (January is the first month), and the value for `year` should be `2024`.\\n\\nTherefore, the appropriate function call with its arguments is:assistant\\n\\nimport datetime\\n\\n# Get the current year\\ncurrent_year = datetime.datetime.now().year\\n\\n# The function call with its arguments\\nprint({\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": current_year}})\"{\\\\\"name\\\\\": \\\\\"getMonthlyExpenseSummary\\\\\", \\\\\"parameters\\\\\": {\\\\\"month\\\\\": 1, \\\\\"year\\\\\": 2024}}\"assistant\\n\\nThe final response is: {\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0005783340893685818,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.016343542141839862,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.6930254579056054,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 529,
+ "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot accomplish this task as it requires capabilities beyond those offered by the provided functions.'\nassert False\n + where False = any(. at 0x10738e0a0>)"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 529,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"]\n content_lower = accumulated_content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'I cannot accomplish this task as it requires capabilities beyond those offered by the provided functions.'\nE assert False\nE + where False = any(. at 0x10738e0a0>)\n\ntests/verifications/openai_api/test_chat_completion.py:529: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00024741701781749725,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.007791666081175208,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.4420052089262754,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.000628374982625246,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.013015333097428083,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.6754761249758303,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.000581083819270134,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.0128930420614779,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.367436750093475,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00024812505580484867,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.006677915807813406,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5142939588986337,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0002248329110443592,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.008392333984375,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 9.519045708002523,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00019570882432162762,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.009688499849289656,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.9869634578935802,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0002135841641575098,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.007028624881058931,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 4.688094082986936,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00026954198256134987,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.006646708119660616,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 15.899775499943644,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0004787910729646683,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.016487207962200046,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.922360667027533,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00043979217298328876,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.013401374919340014,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.2223200001753867,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 529,
+ "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"location\": \"Rome, Italy\"}} is not the best response here.\n \n Since we don't have a function that directly answers \"What's the name of the Sun in latin?\", a more appropriate response would be to say that there's no function available to answer this question. However, to follow the given format and assuming there's an implicit expectation to still attempt an answer or provide a closest match:\n \n {\"name\": \"get_weather\", \"parameters\": {\"location\": \"Invalid input, no relation to weather\"}} is still not a valid response.\n \n A correct response according to the given constraints isn't feasible. However, to fit the required format and indicating a function that could be related or a default, if there was a \"get_fact\" function:\n \n {\"name\": \"get_fact\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \n \n But since \"get_fact\" isn't defined in the prompt, and sticking strictly to the given function:\n \n There isn't a proper function to call.\n \n For the sake of compliance, let's assume an unrelated function was to be used due to lack of information.\n \n The best course of action is to indicate that the provided function definitions don't directly support answering the question about the Latin name of the Sun.'\nassert False\n + where False = any(. at 0x1074b9bd0>)"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 529,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"]\n content_lower = accumulated_content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"location\": \"Rome, Italy\"}} is not the best response here.\nE \nE Since we don't have a function that directly answers \"What's the name of the Sun in latin?\", a more appropriate response would be to say that there's no function available to answer this question. However, to follow the given format and assuming there's an implicit expectation to still attempt an answer or provide a closest match:\nE \nE {\"name\": \"get_weather\", \"parameters\": {\"location\": \"Invalid input, no relation to weather\"}} is still not a valid response.\nE \nE A correct response according to the given constraints isn't feasible. However, to fit the required format and indicating a function that could be related or a default, if there was a \"get_fact\" function:\nE \nE {\"name\": \"get_fact\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \nE \nE But since \"get_fact\" isn't defined in the prompt, and sticking strictly to the given function:\nE \nE There isn't a proper function to call.\nE \nE For the sake of compliance, let's assume an unrelated function was to be used due to lack of information.\nE \nE The best course of action is to indicate that the provided function definitions don't directly support answering the question about the Latin name of the Sun.'\nE assert False\nE + where False = any(. at 0x1074b9bd0>)\n\ntests/verifications/openai_api/test_chat_completion.py:529: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00047154095955193043,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.01485933386720717,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.6193458330817521,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.000300833024084568,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.012684250017628074,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5173197500407696,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00047266692854464054,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.01282945810817182,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.990155333885923,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00027558300644159317,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.008087666006758809,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.6024099169299006,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0010035419836640358,
"outcome": "passed"
}
}
],
- "run_timestamp": 1744328684
+ "run_timestamp": 1744679046
}
diff --git a/tests/verifications/test_results/openai.json b/tests/verifications/test_results/openai.json
index 0c1892f7e..32a2a2b82 100644
--- a/tests/verifications/test_results/openai.json
+++ b/tests/verifications/test_results/openai.json
@@ -1,13 +1,13 @@
{
- "created": 1744328898.0248861,
- "duration": 47.561042070388794,
+ "created": 1744679497.440863,
+ "duration": 102.70424389839172,
"exitcode": 0,
"root": "/Users/erichuang/projects/llama-stack",
"environment": {},
"summary": {
- "passed": 24,
- "total": 24,
- "collected": 24
+ "passed": 52,
+ "total": 52,
+ "collected": 52
},
"collectors": [
{
@@ -27,122 +27,262 @@
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
"type": "Function",
- "lineno": 116
+ "lineno": 117
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
"type": "Function",
- "lineno": 116
+ "lineno": 117
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
"type": "Function",
- "lineno": 135
+ "lineno": 136
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
"type": "Function",
- "lineno": 135
+ "lineno": 136
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
"type": "Function",
- "lineno": 204
+ "lineno": 205
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
"type": "Function",
- "lineno": 204
+ "lineno": 205
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]",
"type": "Function",
- "lineno": 228
+ "lineno": 229
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
"type": "Function",
- "lineno": 228
+ "lineno": 229
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-case0]",
+ "type": "Function",
+ "lineno": 257
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]",
+ "type": "Function",
+ "lineno": 257
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-case0]",
+ "type": "Function",
+ "lineno": 281
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]",
+ "type": "Function",
+ "lineno": 281
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-case0]",
+ "type": "Function",
+ "lineno": 308
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]",
+ "type": "Function",
+ "lineno": 308
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-case0]",
+ "type": "Function",
+ "lineno": 331
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]",
+ "type": "Function",
+ "lineno": 331
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 450
}
]
}
@@ -150,7 +290,7 @@
"tests": [
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[gpt-4o-earth]",
@@ -169,21 +309,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.0694252080284059,
+ "duration": 0.09044458298012614,
"outcome": "passed"
},
"call": {
- "duration": 0.5709165419684723,
+ "duration": 1.3071064590476453,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0007626248989254236,
+ "duration": 0.0003990421537309885,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[gpt-4o-saturn]",
@@ -202,21 +342,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.010281750001013279,
+ "duration": 0.015266708098351955,
"outcome": "passed"
},
"call": {
- "duration": 0.6309260830748826,
+ "duration": 1.3942135840188712,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0001824579667299986,
+ "duration": 0.0006840829737484455,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[gpt-4o-mini-earth]",
@@ -235,21 +375,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.007922374992631376,
+ "duration": 0.028802334098145366,
"outcome": "passed"
},
"call": {
- "duration": 0.31756504194345325,
+ "duration": 0.40633770800195634,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0005268750246614218,
+ "duration": 0.0006945421919226646,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
@@ -268,21 +408,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.01643404201604426,
+ "duration": 0.01865937514230609,
"outcome": "passed"
},
"call": {
- "duration": 0.7479908330133185,
+ "duration": 0.7515070410445333,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0004037501057609916,
+ "duration": 0.0002985831815749407,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[gpt-4o-earth]",
@@ -301,21 +441,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.021671707974746823,
+ "duration": 0.011108374921604991,
"outcome": "passed"
},
"call": {
- "duration": 0.6701172919711098,
+ "duration": 0.3914629169739783,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0005569590721279383,
+ "duration": 0.0006979589816182852,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[gpt-4o-saturn]",
@@ -334,21 +474,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.015847125090658665,
+ "duration": 0.02875337516888976,
"outcome": "passed"
},
"call": {
- "duration": 0.636536999954842,
+ "duration": 0.5632798750884831,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00029395800083875656,
+ "duration": 0.004012458026409149,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[gpt-4o-mini-earth]",
@@ -367,21 +507,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.011792832985520363,
+ "duration": 0.0143584581092,
"outcome": "passed"
},
"call": {
- "duration": 0.5610962919890881,
+ "duration": 0.36101250001229346,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0003578749019652605,
+ "duration": 0.0005384159740060568,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[gpt-4o-mini-saturn]",
@@ -400,21 +540,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.016500207944773138,
+ "duration": 0.017127499915659428,
"outcome": "passed"
},
"call": {
- "duration": 0.8060244580265135,
+ "duration": 0.8120857500471175,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0005296670133247972,
+ "duration": 0.0005928750615566969,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
- "lineno": 116,
+ "lineno": 117,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_image[gpt-4o-case0]",
@@ -433,21 +573,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.008338792016729712,
+ "duration": 0.023183667100965977,
"outcome": "passed"
},
"call": {
- "duration": 7.009252917021513,
+ "duration": 2.8612758750095963,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0003042910248041153,
+ "duration": 0.0005042918492108583,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
- "lineno": 116,
+ "lineno": 117,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_image[gpt-4o-mini-case0]",
@@ -466,21 +606,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.007238540914840996,
+ "duration": 0.007410250138491392,
"outcome": "passed"
},
"call": {
- "duration": 3.134693874977529,
+ "duration": 2.3748936660122126,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0003104590578004718,
+ "duration": 0.00045658298768103123,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
- "lineno": 135,
+ "lineno": 136,
"outcome": "passed",
"keywords": [
"test_chat_streaming_image[gpt-4o-case0]",
@@ -499,21 +639,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.0161851670127362,
+ "duration": 0.023792708991095424,
"outcome": "passed"
},
"call": {
- "duration": 3.0745719589758664,
+ "duration": 3.1502402499318123,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00022620800882577896,
+ "duration": 0.0010152498725801706,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
- "lineno": 135,
+ "lineno": 136,
"outcome": "passed",
"keywords": [
"test_chat_streaming_image[gpt-4o-mini-case0]",
@@ -532,21 +672,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.013220708002336323,
+ "duration": 0.01887162495404482,
"outcome": "passed"
},
"call": {
- "duration": 3.624867417034693,
+ "duration": 2.070013999938965,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00020633300300687551,
+ "duration": 0.0005797501653432846,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[gpt-4o-calendar]",
@@ -565,21 +705,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.017596833989955485,
+ "duration": 0.017477875109761953,
"outcome": "passed"
},
"call": {
- "duration": 1.248568250099197,
+ "duration": 0.7350135410670191,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0004248750628903508,
+ "duration": 0.00046616699546575546,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[gpt-4o-math]",
@@ -598,21 +738,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.01512012502644211,
+ "duration": 0.033007249934598804,
"outcome": "passed"
},
"call": {
- "duration": 8.170285542029887,
+ "duration": 5.031138291116804,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00043537491001188755,
+ "duration": 0.00032295798882842064,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
@@ -631,21 +771,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.010376665974035859,
+ "duration": 0.014672457939013839,
"outcome": "passed"
},
"call": {
- "duration": 0.756480542011559,
+ "duration": 0.7515842081047595,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00025695806834846735,
+ "duration": 0.00034395791590213776,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
@@ -664,21 +804,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.006846625008620322,
+ "duration": 0.02985133300535381,
"outcome": "passed"
},
"call": {
- "duration": 2.6833953330060467,
+ "duration": 2.388004041975364,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00022558309137821198,
+ "duration": 0.00038116704672574997,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[gpt-4o-calendar]",
@@ -697,21 +837,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.009646040969528258,
+ "duration": 0.017887332942336798,
"outcome": "passed"
},
"call": {
- "duration": 0.6117532079806551,
+ "duration": 1.0018641669303179,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00015258300118148327,
+ "duration": 0.0005486670415848494,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[gpt-4o-math]",
@@ -730,21 +870,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.012024458032101393,
+ "duration": 0.0158015841152519,
"outcome": "passed"
},
"call": {
- "duration": 4.522625041077845,
+ "duration": 7.285852208966389,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0004230838967487216,
+ "duration": 0.0003417080733925104,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
@@ -763,21 +903,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.009566582972183824,
+ "duration": 0.014434333890676498,
"outcome": "passed"
},
"call": {
- "duration": 2.5591942919418216,
+ "duration": 0.9268912919797003,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0007555419579148293,
+ "duration": 0.00046200002543628216,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[gpt-4o-mini-math]",
@@ -796,21 +936,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.010828875005245209,
+ "duration": 0.01635808404535055,
"outcome": "passed"
},
"call": {
- "duration": 2.495122667052783,
+ "duration": 3.7341703751590103,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002802090020850301,
+ "duration": 0.0004277920816093683,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
- "lineno": 204,
+ "lineno": 205,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_tool_calling[gpt-4o-case0]",
@@ -829,21 +969,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.012762792059220374,
+ "duration": 0.021756208036094904,
"outcome": "passed"
},
"call": {
- "duration": 0.5655921660363674,
+ "duration": 0.6105514578521252,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00022304197773337364,
+ "duration": 0.0004747910425066948,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
- "lineno": 204,
+ "lineno": 205,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
@@ -862,21 +1002,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.03188708401285112,
+ "duration": 0.015522167086601257,
"outcome": "passed"
},
"call": {
- "duration": 0.6159415419679135,
+ "duration": 0.9731334580574185,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0005549580091610551,
+ "duration": 0.0003415420651435852,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]",
- "lineno": 228,
+ "lineno": 229,
"outcome": "passed",
"keywords": [
"test_chat_streaming_tool_calling[gpt-4o-case0]",
@@ -895,21 +1035,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.014768208027817309,
+ "duration": 0.014343583025038242,
"outcome": "passed"
},
"call": {
- "duration": 0.47373537498060614,
+ "duration": 0.5453979168087244,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0005811670562252402,
+ "duration": 0.0011145840398967266,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
- "lineno": 228,
+ "lineno": 229,
"outcome": "passed",
"keywords": [
"test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
@@ -928,18 +1068,942 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.010271625011228025,
+ "duration": 0.017669249791651964,
"outcome": "passed"
},
"call": {
- "duration": 0.5656027499353513,
+ "duration": 0.6310562079306692,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0025699170073494315,
+ "duration": 0.0006836249958723783,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-case0]",
+ "lineno": 257,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_required[gpt-4o-case0]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.016614832915365696,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.6914504591841251,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0004829999525099993,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]",
+ "lineno": 257,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.03217837493866682,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.4917086660861969,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0005399580113589764,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-case0]",
+ "lineno": 281,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_required[gpt-4o-case0]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.01154208299703896,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5663661658763885,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0008221250027418137,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]",
+ "lineno": 281,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.013238833984360099,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.6098562499973923,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00045654200948774815,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-case0]",
+ "lineno": 308,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_none[gpt-4o-case0]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.014951375080272555,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5425659997854382,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0002112078946083784,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]",
+ "lineno": 308,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.010041083907708526,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.7337456250097603,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00042791711166501045,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-case0]",
+ "lineno": 331,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_none[gpt-4o-case0]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.007236667210236192,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.4192167909350246,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0010569579899311066,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]",
+ "lineno": 331,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.01997062494046986,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.6866283339913934,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0010521251242607832,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.017386124935001135,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 4.425433791941032,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00043645803816616535,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.014067957876250148,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.205255625071004,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0004651669878512621,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.016634040977805853,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.4360020828898996,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0004704580642282963,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.015702415956184268,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 5.882555708056316,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.003662874922156334,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.020038041984662414,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.2738899998366833,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0004929169081151485,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.007982166949659586,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.7494398748967797,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0005488330498337746,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.007455583196133375,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 5.338647875003517,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0005507499445229769,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.01675066608004272,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 4.016703582834452,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0005397920031100512,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.009890957968309522,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.9003724998328835,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0005802921950817108,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.021778207970783114,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.3824402918107808,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0008852919563651085,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.021121500059962273,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.362067250069231,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0007184590213000774,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.01677604205906391,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.4576394581235945,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0005367500707507133,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.010623916983604431,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.295967958169058,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0005429999437183142,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.014912083046510816,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.7422334579750896,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.001017916016280651,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.014568000100553036,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.4006296249572188,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.000492083141580224,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.01243741693906486,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.858031083131209,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0012166248634457588,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.017216125037521124,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.4033057920169085,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00047016702592372894,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.019779917085543275,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.5427470421418548,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0007832080591470003,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.019053417025133967,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 4.038398916134611,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00048545910976827145,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "gpt-4o-mini-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "gpt-4o-mini",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.01692862482741475,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.849576957989484,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0032055408228188753,
"outcome": "passed"
}
}
],
- "run_timestamp": 1744328848
+ "run_timestamp": 1744679391
}
diff --git a/tests/verifications/test_results/together.json b/tests/verifications/test_results/together.json
index 2b23089e8..44e831936 100644
--- a/tests/verifications/test_results/together.json
+++ b/tests/verifications/test_results/together.json
@@ -1,15 +1,15 @@
{
- "created": 1744328847.853437,
- "duration": 49.9419469833374,
+ "created": 1744679387.346831,
+ "duration": 90.31976795196533,
"exitcode": 1,
"root": "/Users/erichuang/projects/llama-stack",
"environment": {},
"summary": {
- "passed": 22,
- "failed": 12,
+ "passed": 37,
+ "failed": 39,
"skipped": 2,
- "total": 36,
- "collected": 36
+ "total": 78,
+ "collected": 78
},
"collectors": [
{
@@ -29,182 +29,392 @@
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
"type": "Function",
- "lineno": 73
+ "lineno": 74
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
"type": "Function",
- "lineno": 92
+ "lineno": 93
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
"type": "Function",
- "lineno": 116
+ "lineno": 117
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
"type": "Function",
- "lineno": 116
+ "lineno": 117
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
"type": "Function",
- "lineno": 116
+ "lineno": 117
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
"type": "Function",
- "lineno": 135
+ "lineno": 136
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
"type": "Function",
- "lineno": 135
+ "lineno": 136
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
"type": "Function",
- "lineno": 135
+ "lineno": 136
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
"type": "Function",
- "lineno": 159
+ "lineno": 160
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
"type": "Function",
- "lineno": 182
+ "lineno": 183
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
"type": "Function",
- "lineno": 204
+ "lineno": 205
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
"type": "Function",
- "lineno": 204
+ "lineno": 205
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
"type": "Function",
- "lineno": 204
+ "lineno": 205
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
"type": "Function",
- "lineno": 228
+ "lineno": 229
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
"type": "Function",
- "lineno": 228
+ "lineno": 229
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
"type": "Function",
- "lineno": 228
+ "lineno": 229
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "type": "Function",
+ "lineno": 257
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "type": "Function",
+ "lineno": 257
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "type": "Function",
+ "lineno": 257
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "type": "Function",
+ "lineno": 281
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "type": "Function",
+ "lineno": 281
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "type": "Function",
+ "lineno": 281
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "type": "Function",
+ "lineno": 308
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "type": "Function",
+ "lineno": 308
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "type": "Function",
+ "lineno": 308
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "type": "Function",
+ "lineno": 331
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "type": "Function",
+ "lineno": 331
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "type": "Function",
+ "lineno": 331
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 359
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+ "type": "Function",
+ "lineno": 450
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+ "type": "Function",
+ "lineno": 450
}
]
}
@@ -212,7 +422,7 @@
"tests": [
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
@@ -231,21 +441,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.15774220903404057,
+ "duration": 0.1559112500399351,
"outcome": "passed"
},
"call": {
- "duration": 0.5396400419995189,
+ "duration": 0.3692209171131253,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002977499971166253,
+ "duration": 0.00021362490952014923,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
@@ -264,21 +474,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.015632833004929125,
+ "duration": 0.007326166843995452,
"outcome": "passed"
},
"call": {
- "duration": 0.4675290420418605,
+ "duration": 0.49173945817165077,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00029129208996891975,
+ "duration": 0.00034487503580749035,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
@@ -297,21 +507,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.01530187507160008,
+ "duration": 0.021014458034187555,
"outcome": "passed"
},
"call": {
- "duration": 0.501894542016089,
+ "duration": 0.36956487502902746,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002060839906334877,
+ "duration": 0.0007119579240679741,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
@@ -330,21 +540,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.014841833035461605,
+ "duration": 0.011922625126317143,
"outcome": "passed"
},
"call": {
- "duration": 0.4202229160582647,
+ "duration": 2.7763332079630345,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0005559159908443689,
+ "duration": 0.0004842919297516346,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
@@ -363,21 +573,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.008204624988138676,
+ "duration": 0.023896750062704086,
"outcome": "passed"
},
"call": {
- "duration": 1.991508833016269,
+ "duration": 0.9817597079090774,
"outcome": "passed"
},
"teardown": {
- "duration": 0.000539042055606842,
+ "duration": 0.0004768748767673969,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
- "lineno": 73,
+ "lineno": 74,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
@@ -396,21 +606,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.022528667002916336,
+ "duration": 0.07423937506973743,
"outcome": "passed"
},
"call": {
- "duration": 0.37111237505450845,
+ "duration": 0.3721332079730928,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0005334159359335899,
+ "duration": 0.00020033284090459347,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
@@ -429,21 +639,21 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.00922920904122293,
+ "duration": 0.010166750056669116,
"outcome": "passed"
},
"call": {
- "duration": 1.1684916669037193,
+ "duration": 0.41266337502747774,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002740409690886736,
+ "duration": 0.00034358282573521137,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
@@ -462,21 +672,21 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.010883333045057952,
+ "duration": 0.016687541967257857,
"outcome": "passed"
},
"call": {
- "duration": 0.4275277080014348,
+ "duration": 0.7235856249462813,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00043112505227327347,
+ "duration": 0.00027179205790162086,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "failed",
"keywords": [
"test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
@@ -495,34 +705,34 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.012945958063937724,
+ "duration": 0.012556416913866997,
"outcome": "passed"
},
"call": {
- "duration": 0.5551295839250088,
+ "duration": 0.27039612480439246,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 110,
+ "lineno": 111,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 110,
+ "lineno": 111,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:111: IndexError"
},
"teardown": {
- "duration": 0.0002744169905781746,
+ "duration": 0.0002312080468982458,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "failed",
"keywords": [
"test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
@@ -541,34 +751,34 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.017372542060911655,
+ "duration": 0.006413874914869666,
"outcome": "passed"
},
"call": {
- "duration": 0.3579877089941874,
+ "duration": 0.36463545891456306,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 110,
+ "lineno": 111,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 110,
+ "lineno": 111,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:111: IndexError"
},
"teardown": {
- "duration": 0.0005445419810712337,
+ "duration": 0.00023154192604124546,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "failed",
"keywords": [
"test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
@@ -587,34 +797,34 @@
"case_id": "earth"
},
"setup": {
- "duration": 0.014297832967713475,
+ "duration": 0.015633082948625088,
"outcome": "passed"
},
"call": {
- "duration": 0.8067362919682637,
+ "duration": 0.8896284159272909,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 110,
+ "lineno": 111,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 110,
+ "lineno": 111,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:111: IndexError"
},
"teardown": {
- "duration": 0.0003220830112695694,
+ "duration": 0.0006587498355656862,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
- "lineno": 92,
+ "lineno": 93,
"outcome": "failed",
"keywords": [
"test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
@@ -633,34 +843,34 @@
"case_id": "saturn"
},
"setup": {
- "duration": 0.008816750021651387,
+ "duration": 0.012669583084061742,
"outcome": "passed"
},
"call": {
- "duration": 0.5383605000097305,
+ "duration": 0.3499396659899503,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 110,
+ "lineno": 111,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 110,
+ "lineno": 111,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:111: IndexError"
},
"teardown": {
- "duration": 0.00018316600471735,
+ "duration": 0.00024912506341934204,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
- "lineno": 116,
+ "lineno": 117,
"outcome": "skipped",
"keywords": [
"test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -679,22 +889,22 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.0074389580404385924,
+ "duration": 0.0153201250359416,
"outcome": "passed"
},
"call": {
- "duration": 0.00014933396596461535,
+ "duration": 0.0001901669893413782,
"outcome": "skipped",
- "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+ "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 126, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
},
"teardown": {
- "duration": 0.00012462493032217026,
+ "duration": 0.00012779212556779385,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
- "lineno": 116,
+ "lineno": 117,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -713,21 +923,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.013580625061877072,
+ "duration": 0.008855124935507774,
"outcome": "passed"
},
"call": {
- "duration": 2.89831429196056,
+ "duration": 1.37906050006859,
"outcome": "passed"
},
"teardown": {
- "duration": 0.000491458922624588,
+ "duration": 0.0004904591478407383,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
- "lineno": 116,
+ "lineno": 117,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -746,21 +956,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.008266666904091835,
+ "duration": 0.017166708130389452,
"outcome": "passed"
},
"call": {
- "duration": 3.8873212080216035,
+ "duration": 4.003400916932151,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00016850000247359276,
+ "duration": 0.00042724981904029846,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
- "lineno": 135,
+ "lineno": 136,
"outcome": "skipped",
"keywords": [
"test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -779,22 +989,22 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.0080461660400033,
+ "duration": 0.007232750067487359,
"outcome": "passed"
},
"call": {
- "duration": 0.00014758307952433825,
+ "duration": 0.0001449580304324627,
"outcome": "skipped",
- "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+ "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 145, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
},
"teardown": {
- "duration": 0.00012695800978690386,
+ "duration": 0.0001349160447716713,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
- "lineno": 135,
+ "lineno": 136,
"outcome": "failed",
"keywords": [
"test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -813,34 +1023,34 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.00845700001809746,
+ "duration": 0.007052165921777487,
"outcome": "passed"
},
"call": {
- "duration": 1.6604419159702957,
+ "duration": 1.4663615000899881,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 153,
+ "lineno": 154,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 153,
+ "lineno": 154,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:154: IndexError"
},
"teardown": {
- "duration": 0.00033458403777331114,
+ "duration": 0.0005696250591427088,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
- "lineno": 135,
+ "lineno": 136,
"outcome": "failed",
"keywords": [
"test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -859,34 +1069,34 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.012580333976075053,
+ "duration": 0.01214433298446238,
"outcome": "passed"
},
"call": {
- "duration": 4.728511792025529,
+ "duration": 3.902559082955122,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 153,
+ "lineno": 154,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 153,
+ "lineno": 154,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:154: IndexError"
},
"teardown": {
- "duration": 0.00023266696371138096,
+ "duration": 0.000591374933719635,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
@@ -905,21 +1115,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.011554082971997559,
+ "duration": 0.01478054211474955,
"outcome": "passed"
},
"call": {
- "duration": 1.3857994999270886,
+ "duration": 0.569845792138949,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0003951250109821558,
+ "duration": 0.00038724998012185097,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
@@ -938,21 +1148,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.007673708954825997,
+ "duration": 0.014717916958034039,
"outcome": "passed"
},
"call": {
- "duration": 3.082161583006382,
+ "duration": 1.1819656670559198,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002532500075176358,
+ "duration": 0.0002410421147942543,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
@@ -971,21 +1181,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.014791041961871088,
+ "duration": 0.006486707832664251,
"outcome": "passed"
},
"call": {
- "duration": 0.6918012499809265,
+ "duration": 0.5623017910402268,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00027070799842476845,
+ "duration": 0.00032504182308912277,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
@@ -1004,21 +1214,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.014746625092811882,
+ "duration": 0.009171125013381243,
"outcome": "passed"
},
"call": {
- "duration": 3.5890139170223847,
+ "duration": 2.6005691669415683,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00030137505382299423,
+ "duration": 0.00023995805531740189,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
@@ -1037,21 +1247,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.036798374960199,
+ "duration": 0.009700333932414651,
"outcome": "passed"
},
"call": {
- "duration": 0.6914895409718156,
+ "duration": 0.4192442081402987,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00023716699797660112,
+ "duration": 0.00040241610258817673,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
- "lineno": 159,
+ "lineno": 160,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
@@ -1070,21 +1280,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.05965254199691117,
+ "duration": 0.006938542006537318,
"outcome": "passed"
},
"call": {
- "duration": 2.609581291093491,
+ "duration": 2.1736337919719517,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002674580318853259,
+ "duration": 0.00019279099069535732,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
@@ -1103,21 +1313,21 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.014533916022628546,
+ "duration": 0.008775749942287803,
"outcome": "passed"
},
"call": {
- "duration": 0.6227063750848174,
+ "duration": 0.5588400410488248,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00019699998665601015,
+ "duration": 0.00040091690607368946,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
@@ -1136,21 +1346,21 @@
"case_id": "math"
},
"setup": {
- "duration": 0.009818125050514936,
+ "duration": 0.01844154205173254,
"outcome": "passed"
},
"call": {
- "duration": 5.144610875053331,
+ "duration": 2.205772665794939,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00045220903120934963,
+ "duration": 0.00021091708913445473,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "failed",
"keywords": [
"test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
@@ -1169,34 +1379,34 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.012392290984280407,
+ "duration": 0.015595750184729695,
"outcome": "passed"
},
"call": {
- "duration": 0.777625665999949,
+ "duration": 0.6904467919375747,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 201,
+ "lineno": 202,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 201,
+ "lineno": 202,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:202: IndexError"
},
"teardown": {
- "duration": 0.000559916952624917,
+ "duration": 0.0002907498273998499,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "failed",
"keywords": [
"test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
@@ -1215,34 +1425,34 @@
"case_id": "math"
},
"setup": {
- "duration": 0.010390624986030161,
+ "duration": 0.008272957988083363,
"outcome": "passed"
},
"call": {
- "duration": 2.680094916955568,
+ "duration": 3.499622541014105,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 201,
+ "lineno": 202,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 201,
+ "lineno": 202,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:202: IndexError"
},
"teardown": {
- "duration": 0.00041987502481788397,
+ "duration": 0.0005947079043835402,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "failed",
"keywords": [
"test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
@@ -1261,34 +1471,34 @@
"case_id": "calendar"
},
"setup": {
- "duration": 0.01190529193263501,
+ "duration": 0.013340875040739775,
"outcome": "passed"
},
"call": {
- "duration": 0.6690819580107927,
+ "duration": 0.42789591709151864,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 201,
+ "lineno": 202,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 201,
+ "lineno": 202,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:202: IndexError"
},
"teardown": {
- "duration": 0.000247166957706213,
+ "duration": 0.0003039578441530466,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
- "lineno": 182,
+ "lineno": 183,
"outcome": "failed",
"keywords": [
"test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
@@ -1307,34 +1517,34 @@
"case_id": "math"
},
"setup": {
- "duration": 0.009588208980858326,
+ "duration": 0.01058275019749999,
"outcome": "passed"
},
"call": {
- "duration": 2.4867218340514228,
+ "duration": 5.795635707909241,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 201,
+ "lineno": 202,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 201,
+ "lineno": 202,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:202: IndexError"
},
"teardown": {
- "duration": 0.00022487505339086056,
+ "duration": 0.0005178749561309814,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
- "lineno": 204,
+ "lineno": 205,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -1353,21 +1563,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.008509417064487934,
+ "duration": 0.014336749911308289,
"outcome": "passed"
},
"call": {
- "duration": 0.45511841599363834,
+ "duration": 0.451304541900754,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00031033402774482965,
+ "duration": 0.0004718329291790724,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
- "lineno": 204,
+ "lineno": 205,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -1386,21 +1596,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.01352791697718203,
+ "duration": 0.01625004201196134,
"outcome": "passed"
},
"call": {
- "duration": 0.7166531670372933,
+ "duration": 0.5111537908669561,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00031470798421651125,
+ "duration": 0.00046774977818131447,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
- "lineno": 204,
+ "lineno": 205,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -1419,21 +1629,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.01369225000962615,
+ "duration": 0.015832332894206047,
"outcome": "passed"
},
"call": {
- "duration": 0.34134254103992134,
+ "duration": 0.8238586660008878,
"outcome": "passed"
},
"teardown": {
- "duration": 0.0002922919811680913,
+ "duration": 0.0006185418460518122,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
- "lineno": 228,
+ "lineno": 229,
"outcome": "passed",
"keywords": [
"test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -1452,21 +1662,21 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.025748749962076545,
+ "duration": 0.007832166040316224,
"outcome": "passed"
},
"call": {
- "duration": 0.7462511250050738,
+ "duration": 0.685583250131458,
"outcome": "passed"
},
"teardown": {
- "duration": 0.00030449999030679464,
+ "duration": 0.0004414590075612068,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
- "lineno": 228,
+ "lineno": 229,
"outcome": "failed",
"keywords": [
"test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -1485,34 +1695,39 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.015131957945413888,
+ "duration": 0.021764083998277783,
"outcome": "passed"
},
"call": {
- "duration": 0.4556894999695942,
+ "duration": 0.35617320891469717,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 251,
+ "lineno": 587,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 251,
+ "lineno": 247,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:247: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
},
"teardown": {
- "duration": 0.000539042055606842,
+ "duration": 0.0005425831768661737,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
- "lineno": 228,
+ "lineno": 229,
"outcome": "failed",
"keywords": [
"test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -1531,31 +1746,1833 @@
"case_id": "case0"
},
"setup": {
- "duration": 0.016429082956165075,
+ "duration": 0.016708041075617075,
"outcome": "passed"
},
"call": {
- "duration": 0.3677835420239717,
+ "duration": 0.49443637509830296,
"outcome": "failed",
"crash": {
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 251,
+ "lineno": 587,
"message": "IndexError: list index out of range"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
- "lineno": 251,
+ "lineno": 247,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
"message": "IndexError"
}
],
- "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError"
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:247: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
},
"teardown": {
- "duration": 0.001610000035725534,
+ "duration": 0.0002642078325152397,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "lineno": 257,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.009570583933964372,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5232214999850839,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0006591668352484703,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "lineno": 257,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.01567283389158547,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.4465816249139607,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0003922500181943178,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "lineno": 257,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.021711332956328988,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5361095829866827,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0003099590539932251,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "lineno": 281,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.009334125090390444,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5789772500284016,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00037712487392127514,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "lineno": 281,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.019614499993622303,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.444399792002514,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 300,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:300: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.0004192921333014965,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "lineno": 281,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.012822834076359868,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.6777042911853641,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 300,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:300: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.0004483328666538,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "lineno": 308,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.011924332939088345,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.4756374170538038,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 328,
+ "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\n + where [ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=13421903014786785000).message"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 328,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE AssertionError: Expected no tool calls when tool_choice='none'\nE assert [ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\nE + where [ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=13421903014786785000).message\n\ntests/verifications/openai_api/test_chat_completion.py:328: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0004585420247167349,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "lineno": 308,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.013246082933619618,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5618870409671217,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 328,
+ "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)] is None\n + where [ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]).tool_calls\n + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 328,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE AssertionError: Expected no tool calls when tool_choice='none'\nE assert [ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)] is None\nE + where [ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:328: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00025883293710649014,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "lineno": 308,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.008055417099967599,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.32869591703638434,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 328,
+ "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n + where [ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 328,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE AssertionError: Expected no tool calls when tool_choice='none'\nE assert [ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE + where [ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:328: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0003937501460313797,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "lineno": 331,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.013460749993100762,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.35879983310587704,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 355,
+ "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n + where [ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 355,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=True,\n )\n \n content = \"\"\n for chunk in stream:\n delta = chunk.choices[0].delta\n if delta.content:\n content += delta.content\n> assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE AssertionError: Expected no tool call chunks when tool_choice='none'\nE assert not [ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE + where [ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:355: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0002649170346558094,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "lineno": 331,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.0068365419283509254,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5351063329726458,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 355,
+ "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n + where [ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 355,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=True,\n )\n \n content = \"\"\n for chunk in stream:\n delta = chunk.choices[0].delta\n if delta.content:\n content += delta.content\n> assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE AssertionError: Expected no tool call chunks when tool_choice='none'\nE assert not [ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE + where [ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:355: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0004712918307632208,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "lineno": 331,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "case0"
+ },
+ "setup": {
+ "duration": 0.014073874801397324,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.6729549579322338,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 355,
+ "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n + where [ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 355,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=True,\n )\n \n content = \"\"\n for chunk in stream:\n delta = chunk.choices[0].delta\n if delta.content:\n content += delta.content\n> assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE AssertionError: Expected no tool call chunks when tool_choice='none'\nE assert not [ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE + where [ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:355: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.000251916004344821,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.009340125136077404,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.3328715830575675,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n + where 1 = len(([ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\n + where [ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 0 tool calls, but got 1\nE assert 1 == 0\nE + where 1 = len(([ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\nE + where [ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00042020808905363083,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.01490145898424089,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.8346118750050664,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00034404080361127853,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.014493625145405531,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.8973606249783188,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00021345820277929306,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.009358166949823499,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 4.5295154170598835,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0002461671829223633,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.009552374947816133,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.34176899981684983,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 429,
+ "message": "AssertionError: Expected arguments '{'month': 1, 'year': 2025}', got '{'month': '1', 'year': '2025'}'\nassert {'month': '1', 'year': '2025'} == {'month': 1, 'year': 2025}\n \n Differing items:\n {'month': '1'} != {'month': 1}\n {'year': '2025'} != {'year': 2025}\n \n Full diff:\n {...\n \n ...Full output truncated (7 lines hidden), use '-vv' to show"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 429,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n> assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\nE AssertionError: Expected arguments '{'month': 1, 'year': 2025}', got '{'month': '1', 'year': '2025'}'\nE assert {'month': '1', 'year': '2025'} == {'month': 1, 'year': 2025}\nE \nE Differing items:\nE {'month': '1'} != {'month': 1}\nE {'year': '2025'} != {'year': 2025}\nE \nE Full diff:\nE {...\nE \nE ...Full output truncated (7 lines hidden), use '-vv' to show\n\ntests/verifications/openai_api/test_chat_completion.py:429: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.000527665950357914,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.012501416960731149,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.585734374821186,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError: Expected 0 tool calls, but got 2\nassert 2 == 0\n + where 2 = len(([ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)]))\n + where [ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)]).tool_calls"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 418,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 0 tool calls, but got 2\nE assert 2 == 0\nE + where 2 = len(([ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)]))\nE + where [ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0003941669128835201,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.014057958032935858,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.7121559998486191,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00048266700468957424,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.02072141715325415,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.0424797078594565,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0004878339823335409,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.018570583080872893,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.4340267919469625,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00023016706109046936,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.009570334106683731,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.2068665840197355,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00051837507635355,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.01873366697691381,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5193468749057502,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 446,
+ "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nassert False\n + where False = any(. at 0x10e4c0f90>)"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 446,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nE assert False\nE + where False = any(. at 0x10e4c0f90>)\n\ntests/verifications/openai_api/test_chat_completion.py:446: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0004933748859912157,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.014272749889642,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.911199334077537,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00043049990199506283,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.031040542060509324,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.0026419160421938,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00045104208402335644,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+ "lineno": 359,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.016529500018805265,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.7563346249517053,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 429,
+ "message": "AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'participants': '[\"Alice\", \"Bob\", \"Charlie\"]', 'location': 'Main Conference Room', 'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00'}'\nassert {'date': '202...arlie\"]', ...} == {'date': '202...harlie'], ...}\n \n Omitting 4 identical items, use -vv to show\n Differing items:\n {'participants': '[\"Alice\", \"Bob\", \"Charlie\"]'} != {'participants': ['Alice', 'Bob', 'Charlie']}\n \n Full diff:\n {...\n \n ...Full output truncated (11 lines hidden), use '-vv' to show"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 429,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n> assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\nE AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'participants': '[\"Alice\", \"Bob\", \"Charlie\"]', 'location': 'Main Conference Room', 'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00'}'\nE assert {'date': '202...arlie\"]', ...} == {'date': '202...harlie'], ...}\nE \nE Omitting 4 identical items, use -vv to show\nE Differing items:\nE {'participants': '[\"Alice\", \"Bob\", \"Charlie\"]'} != {'participants': ['Alice', 'Bob', 'Charlie']}\nE \nE Full diff:\nE {...\nE \nE ...Full output truncated (11 lines hidden), use '-vv' to show\n\ntests/verifications/openai_api/test_chat_completion.py:429: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0005542081780731678,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+ "lineno": 359,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.013607957866042852,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 3.0105869588442147,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.0004793750122189522,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.01806124998256564,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.3295827910769731,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n + where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_l066e8oey2i8exeodczlv1mh', 'type': 'function'}]))"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 500,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 0 tool calls, but got 1\nE assert 1 == 0\nE + where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_l066e8oey2i8exeodczlv1mh', 'type': 'function'}]))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.0002942080609500408,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.007637625094503164,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 2.021851292112842,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 526,
+ "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 526,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n> assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE AssertionError: Expected content, but none received.\nE assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:526: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00036791712045669556,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+ "lineno": 450,
+ "outcome": "passed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.013031583046540618,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.8596610419917852,
+ "outcome": "passed"
+ },
+ "teardown": {
+ "duration": 0.00042829103767871857,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.015244666952639818,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.0227877080906183,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 526,
+ "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 526,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n> assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE AssertionError: Expected content, but none received.\nE assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:526: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00024933391250669956,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.008626125054433942,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.3212552920449525,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 512,
+ "message": "AssertionError: Expected arguments '{'month': 1, 'year': 2025}', got '{'month': '1', 'year': '2025'}'\nassert {'month': '1', 'year': '2025'} == {'month': 1, 'year': 2025}\n \n Differing items:\n {'month': '1'} != {'month': 1}\n {'year': '2025'} != {'year': 2025}\n \n Full diff:\n {...\n \n ...Full output truncated (7 lines hidden), use '-vv' to show"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 512,
+ "message": "AssertionError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n> assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\nE AssertionError: Expected arguments '{'month': 1, 'year': 2025}', got '{'month': '1', 'year': '2025'}'\nE assert {'month': '1', 'year': '2025'} == {'month': 1, 'year': 2025}\nE \nE Differing items:\nE {'month': '1'} != {'month': 1}\nE {'year': '2025'} != {'year': 2025}\nE \nE Full diff:\nE {...\nE \nE ...Full output truncated (7 lines hidden), use '-vv' to show\n\ntests/verifications/openai_api/test_chat_completion.py:512: AssertionError"
+ },
+ "teardown": {
+ "duration": 0.00020562508143484592,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.007338125025853515,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.4175920831039548,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 485,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.00023462506942451,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.007788832997903228,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.45610866602510214,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 485,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.00021450011990964413,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.006751166889443994,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.7053082089405507,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 485,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.00021783309057354927,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.008729791967198253,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5665898330044001,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 485,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.0002288338728249073,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.009526000125333667,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.1714977910742164,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 485,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.00032483390532433987,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "text_then_weather_tool"
+ },
+ "setup": {
+ "duration": 0.010107750073075294,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.26202141703106463,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 485,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.00022558285854756832,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "weather_tool_then_text"
+ },
+ "setup": {
+ "duration": 0.008256082888692617,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.3466235001105815,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 485,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.000535458093509078,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "add_product_tool"
+ },
+ "setup": {
+ "duration": 0.0180504999589175,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 1.8803812500555068,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 485,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.00025062495842576027,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "get_then_create_event_tool"
+ },
+ "setup": {
+ "duration": 0.00993091706186533,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.5258524999953806,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 485,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.0002823749091476202,
+ "outcome": "passed"
+ }
+ },
+ {
+ "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+ "lineno": 450,
+ "outcome": "failed",
+ "keywords": [
+ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
+ "parametrize",
+ "pytestmark",
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool",
+ "test_chat_completion.py",
+ "openai_api",
+ "verifications",
+ "tests",
+ "llama-stack",
+ ""
+ ],
+ "metadata": {
+ "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ "case_id": "compare_monthly_expense_tool"
+ },
+ "setup": {
+ "duration": 0.047535917023196816,
+ "outcome": "passed"
+ },
+ "call": {
+ "duration": 0.4426498331595212,
+ "outcome": "failed",
+ "crash": {
+ "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError: list index out of range"
+ },
+ "traceback": [
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 485,
+ "message": ""
+ },
+ {
+ "path": "tests/verifications/openai_api/test_chat_completion.py",
+ "lineno": 587,
+ "message": "IndexError"
+ }
+ ],
+ "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+ },
+ "teardown": {
+ "duration": 0.0010368749499320984,
"outcome": "passed"
}
}
],
- "run_timestamp": 1744328795
+ "run_timestamp": 1744679294
}