Merge branch 'main' into fix/issue-4005-installer-macos-permissions

This commit is contained in:
Roy Belio 2025-11-06 20:31:28 +02:00 committed by GitHub
commit 03d85ae04a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
599 changed files with 9777 additions and 370163 deletions

View file

@ -72,7 +72,8 @@ runs:
echo "New recordings detected, committing and pushing"
git add tests/integration/
git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
git fetch origin ${{ github.ref_name }}
git rebase origin/${{ github.ref_name }}
echo "Rebased successfully"
@ -88,6 +89,8 @@ runs:
run: |
# Ollama logs (if ollama container exists)
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
# vllm logs (if vllm container exists)
sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
# Note: distro container logs are now dumped in integration-tests.sh before container is removed
- name: Upload logs

View file

@ -11,13 +11,14 @@ runs:
--name vllm \
-p 8000:8000 \
--privileged=true \
quay.io/higginsd/vllm-cpu:65393ee064 \
quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
--host 0.0.0.0 \
--port 8000 \
--enable-auto-tool-choice \
--tool-call-parser llama3_json \
--model /root/.cache/Llama-3.2-1B-Instruct \
--served-model-name meta-llama/Llama-3.2-1B-Instruct
--tool-call-parser hermes \
--model /root/.cache/Qwen3-0.6B \
--served-model-name Qwen/Qwen3-0.6B \
--max-model-len 8192
# Wait for vllm to be ready
echo "Waiting for vllm to be ready..."

View file

@ -23,10 +23,10 @@ on:
- '.github/actions/setup-test-environment/action.yml'
- '.github/actions/run-and-record-tests/action.yml'
- 'scripts/integration-tests.sh'
- 'scripts/generate_ci_matrix.py'
schedule:
# If changing the cron schedule, update the provider in the test-matrix job
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
- cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC
workflow_dispatch:
inputs:
test-all-client-versions:
@ -44,8 +44,27 @@ concurrency:
cancel-in-progress: true
jobs:
generate-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Generate test matrix
id: set-matrix
run: |
# Generate matrix from CI_MATRIX in tests/integration/suites.py
# Supports schedule-based and manual input overrides
MATRIX=$(PYTHONPATH=. python3 scripts/generate_ci_matrix.py \
--schedule "${{ github.event.schedule }}" \
--test-setup "${{ github.event.inputs.test-setup }}")
echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
echo "Generated matrix: $MATRIX"
run-replay-mode-tests:
needs: generate-matrix
runs-on: ubuntu-latest
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
@ -56,18 +75,9 @@ jobs:
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
# Define (setup, suite) pairs - they are always matched and cannot be independent
# Weekly schedule (Sun 1 AM): vllm+base
# Input test-setup=ollama-vision: ollama-vision+vision
# Default (including test-setup=ollama): ollama+base, ollama-vision+vision, gpt+responses
config: >-
${{
github.event.schedule == '1 0 * * 0'
&& fromJSON('[{"setup": "vllm", "suite": "base"}]')
|| github.event.inputs.test-setup == 'ollama-vision'
&& fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
|| fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}, {"setup": "gpt", "suite": "responses"}]')
}}
# Test configurations: Generated from CI_MATRIX in tests/integration/suites.py
# See scripts/generate_ci_matrix.py for generation logic
config: ${{ fromJSON(needs.generate-matrix.outputs.matrix).include }}
steps:
- name: Checkout repository

View file

@ -165,3 +165,14 @@ jobs:
echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
fi
exit $status
- name: Check if any unused recordings
run: |
set -e
PYTHONPATH=$PWD uv run ./scripts/cleanup_recordings.py --delete
changes=$(git status --short tests/integration | grep 'recordings' || true)
if [ -n "$changes" ]; then
echo "::error::Unused integration recordings detected. Run 'PYTHONPATH=$(pwd) uv run ./scripts/cleanup_recordings.py --delete' locally and commit the deletions."
echo "$changes"
exit 1
fi

View file

@ -9976,6 +9976,70 @@ components:
- metadata
title: VectorStoreObject
description: OpenAI Vector Store object.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreRequestWithExtraBody":
type: object
properties:
@ -10001,15 +10065,7 @@ components:
description: >-
(Optional) Expiration policy for the vector store
chunking_strategy:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
$ref: '#/components/schemas/VectorStoreChunkingStrategy'
description: >-
(Optional) Strategy for splitting files into chunks
metadata:
@ -10085,70 +10141,6 @@ components:
- deleted
title: VectorStoreDeleteResponse
description: Response from deleting a vector store.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
type: object
properties:
@ -10606,7 +10598,9 @@ components:
description: >-
Object type identifier for the search results page
search_query:
type: string
type: array
items:
type: string
description: >-
The original search query that was executed
data:

View file

@ -47,7 +47,7 @@ RUN set -eux; \
exit 1; \
fi
RUN pip install --no-cache-dir uv
RUN pip install --no-cache uv
ENV UV_SYSTEM_PYTHON=1
ENV INSTALL_MODE=${INSTALL_MODE}
@ -72,7 +72,7 @@ RUN set -eux; \
echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
exit 1; \
fi; \
uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
uv pip install --no-cache -e "$LLAMA_STACK_CLIENT_DIR"; \
fi;
# Install llama-stack
@ -88,22 +88,22 @@ RUN set -eux; \
fi; \
if [ -n "$SAVED_UV_EXTRA_INDEX_URL" ] && [ -n "$SAVED_UV_INDEX_STRATEGY" ]; then \
UV_EXTRA_INDEX_URL="$SAVED_UV_EXTRA_INDEX_URL" UV_INDEX_STRATEGY="$SAVED_UV_INDEX_STRATEGY" \
uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
uv pip install --no-cache -e "$LLAMA_STACK_DIR"; \
else \
uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
uv pip install --no-cache -e "$LLAMA_STACK_DIR"; \
fi; \
elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
uv pip install --no-cache-dir fastapi libcst; \
uv pip install --no-cache fastapi libcst; \
if [ -n "$TEST_PYPI_VERSION" ]; then \
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
else \
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
fi; \
else \
if [ -n "$PYPI_VERSION" ]; then \
uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \
uv pip install --no-cache "llama-stack==$PYPI_VERSION"; \
else \
uv pip install --no-cache-dir llama-stack; \
uv pip install --no-cache llama-stack; \
fi; \
fi;
@ -117,7 +117,7 @@ RUN set -eux; \
fi; \
deps="$(llama stack list-deps "$DISTRO_NAME")"; \
if [ -n "$deps" ]; then \
printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \
printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache; \
fi
# Cleanup

View file

@ -163,7 +163,41 @@ docker run \
--port $LLAMA_STACK_PORT
```
### Via venv
The container will run the distribution with a SQLite store by default. This store is used for the following components:
- Metadata store: store metadata about the models, providers, etc.
- Inference store: collect of responses from the inference provider
- Agents store: store agent configurations (sessions, turns, etc.)
- Agents Responses store: store responses from the agents
However, you can use PostgreSQL instead by running the `starter::run-with-postgres-store.yaml` configuration:
```bash
docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-e OPENAI_API_KEY=your_openai_key \
-e FIREWORKS_API_KEY=your_fireworks_key \
-e TOGETHER_API_KEY=your_together_key \
-e POSTGRES_HOST=your_postgres_host \
-e POSTGRES_PORT=your_postgres_port \
-e POSTGRES_DB=your_postgres_db \
-e POSTGRES_USER=your_postgres_user \
-e POSTGRES_PASSWORD=your_postgres_password \
llamastack/distribution-starter \
starter::run-with-postgres-store.yaml
```
Postgres environment variables:
- `POSTGRES_HOST`: Postgres host (default: `localhost`)
- `POSTGRES_PORT`: Postgres port (default: `5432`)
- `POSTGRES_DB`: Postgres database name (default: `llamastack`)
- `POSTGRES_USER`: Postgres username (default: `llamastack`)
- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`)
### Via Conda or venv
Ensure you have configured the starter distribution using the environment variables explained above.
@ -171,8 +205,11 @@ Ensure you have configured the starter distribution using the environment variab
# Install dependencies for the starter distribution
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
# Run the server
# Run the server (with SQLite - default)
uv run --with llama-stack llama stack run starter
# Or run with PostgreSQL
uv run --with llama-stack llama stack run starter::run-with-postgres-store.yaml
```
## Example Usage

View file

@ -16,7 +16,7 @@ Passthrough inference provider for connecting to any external inference service
|-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
## Sample Configuration

View file

@ -9260,6 +9260,70 @@ components:
- metadata
title: VectorStoreObject
description: OpenAI Vector Store object.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreRequestWithExtraBody":
type: object
properties:
@ -9285,15 +9349,7 @@ components:
description: >-
(Optional) Expiration policy for the vector store
chunking_strategy:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
$ref: '#/components/schemas/VectorStoreChunkingStrategy'
description: >-
(Optional) Strategy for splitting files into chunks
metadata:
@ -9369,70 +9425,6 @@ components:
- deleted
title: VectorStoreDeleteResponse
description: Response from deleting a vector store.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
type: object
properties:
@ -9890,7 +9882,9 @@ components:
description: >-
Object type identifier for the search results page
search_query:
type: string
type: array
items:
type: string
description: >-
The original search query that was executed
data:

View file

@ -9976,6 +9976,70 @@ components:
- metadata
title: VectorStoreObject
description: OpenAI Vector Store object.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreRequestWithExtraBody":
type: object
properties:
@ -10001,15 +10065,7 @@ components:
description: >-
(Optional) Expiration policy for the vector store
chunking_strategy:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
$ref: '#/components/schemas/VectorStoreChunkingStrategy'
description: >-
(Optional) Strategy for splitting files into chunks
metadata:
@ -10085,70 +10141,6 @@ components:
- deleted
title: VectorStoreDeleteResponse
description: Response from deleting a vector store.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
type: object
properties:
@ -10606,7 +10598,9 @@ components:
description: >-
Object type identifier for the search results page
search_query:
type: string
type: array
items:
type: string
description: >-
The original search query that was executed
data:

272
scripts/cleanup_recordings.py Executable file
View file

@ -0,0 +1,272 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""
Clean up unused test recordings based on CI test collection.
This script:
1. Reads CI matrix definitions from tests/integration/ci_matrix.json (default + scheduled overrides)
2. Uses pytest --collect-only with --json-report to gather all test IDs that run in CI
3. Compares against existing recordings to identify unused ones
4. Optionally deletes unused recordings
Usage:
# Dry run - see what would be deleted
./scripts/cleanup_recordings.py
# Save manifest of CI test IDs for inspection
./scripts/cleanup_recordings.py --manifest ci_tests.txt
# Actually delete unused recordings
./scripts/cleanup_recordings.py --delete
"""
import argparse
import json
import os
import subprocess
import tempfile
from collections import defaultdict
from pathlib import Path
REPO_ROOT = Path(__file__).parent.parent
# Load CI matrix from JSON file
CI_MATRIX_FILE = REPO_ROOT / "tests/integration/ci_matrix.json"
with open(CI_MATRIX_FILE) as f:
_matrix_config = json.load(f)
DEFAULT_CI_MATRIX: list[dict[str, str]] = _matrix_config["default"]
SCHEDULED_MATRICES: dict[str, list[dict[str, str]]] = _matrix_config.get("schedules", {})
def _unique_configs(entries):
seen: set[tuple[str, str]] = set()
for entry in entries:
suite = entry["suite"]
setup = entry["setup"]
key = (suite, setup)
if key in seen:
continue
seen.add(key)
yield {"suite": suite, "setup": setup}
def iter_all_ci_configs() -> list[dict[str, str]]:
"""Return unique CI configs across default and scheduled matrices."""
combined = list(DEFAULT_CI_MATRIX)
for configs in SCHEDULED_MATRICES.values():
combined.extend(configs)
return list(_unique_configs(combined))
def collect_ci_tests():
"""Collect all test IDs that would run in CI using --collect-only with JSON output."""
all_test_ids = set()
configs = iter_all_ci_configs()
for config in configs:
print(f"Collecting tests for suite={config['suite']}, setup={config['setup']}...")
# Create a temporary file for JSON report
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json_report_file = f.name
try:
# Configure environment for collection run
env = os.environ.copy()
env["PYTEST_ADDOPTS"] = f"--json-report --json-report-file={json_report_file}"
repo_path = str(REPO_ROOT)
existing_path = env.get("PYTHONPATH", "")
env["PYTHONPATH"] = f"{repo_path}{os.pathsep}{existing_path}" if existing_path else repo_path
result = subprocess.run(
[
"./scripts/integration-tests.sh",
"--collect-only",
"--suite",
config["suite"],
"--setup",
config["setup"],
],
capture_output=True,
text=True,
cwd=REPO_ROOT,
env=env,
)
if result.returncode != 0:
raise RuntimeError(
"Test collection failed.\n"
f"Command: {' '.join(result.args)}\n"
f"stdout:\n{result.stdout}\n"
f"stderr:\n{result.stderr}"
)
# Parse JSON report to extract test IDs
try:
with open(json_report_file) as f:
report = json.load(f)
# The "collectors" field contains collected test items
# Each collector has a "result" array with test node IDs
for collector in report.get("collectors", []):
for item in collector.get("result", []):
# The "nodeid" field is the test ID
if "nodeid" in item:
all_test_ids.add(item["nodeid"])
print(f" Collected {len(all_test_ids)} test IDs so far")
except (json.JSONDecodeError, FileNotFoundError) as e:
print(f" Warning: Failed to parse JSON report: {e}")
continue
finally:
# Clean up temp file
if os.path.exists(json_report_file):
os.unlink(json_report_file)
print(f"\nTotal unique test IDs collected: {len(all_test_ids)}")
return all_test_ids, configs
def get_base_test_id(test_id: str) -> str:
"""Extract base test ID without parameterization.
Example:
'tests/integration/inference/test_foo.py::test_bar[param1-param2]'
-> 'tests/integration/inference/test_foo.py::test_bar'
"""
return test_id.split("[")[0] if "[" in test_id else test_id
def find_all_recordings():
"""Find all recording JSON files."""
return list((REPO_ROOT / "tests/integration").rglob("recordings/*.json"))
def analyze_recordings(ci_test_ids, dry_run=True):
"""Analyze recordings and identify unused ones."""
# Use full test IDs with parameterization for exact matching
all_recordings = find_all_recordings()
print(f"\nTotal recording files: {len(all_recordings)}")
# Categorize recordings
used_recordings = []
unused_recordings = []
shared_recordings = [] # model-list endpoints without test_id
parse_errors = []
for json_file in all_recordings:
try:
with open(json_file) as f:
data = json.load(f)
test_id = data.get("test_id", "")
if not test_id:
# Shared/infrastructure recordings (model lists, etc)
shared_recordings.append(json_file)
continue
# Match exact test_id (with full parameterization)
if test_id in ci_test_ids:
used_recordings.append(json_file)
else:
unused_recordings.append((json_file, test_id))
except Exception as e:
parse_errors.append((json_file, str(e)))
# Print summary
print("\nRecording Analysis:")
print(f" Used in CI: {len(used_recordings)}")
print(f" Shared (no ID): {len(shared_recordings)}")
print(f" UNUSED: {len(unused_recordings)}")
print(f" Parse errors: {len(parse_errors)}")
if unused_recordings:
print("\nUnused recordings by test:")
# Group by base test ID
by_test = defaultdict(list)
for file, test_id in unused_recordings:
base = get_base_test_id(test_id)
by_test[base].append(file)
for base_test, files in sorted(by_test.items()):
print(f"\n {base_test}")
print(f" ({len(files)} recording(s))")
for f in files[:3]:
print(f" - {f.relative_to(REPO_ROOT / 'tests/integration')}")
if len(files) > 3:
print(f" ... and {len(files) - 3} more")
if parse_errors:
print("\nParse errors:")
for file, error in parse_errors[:5]:
print(f" {file.relative_to(REPO_ROOT)}: {error}")
if len(parse_errors) > 5:
print(f" ... and {len(parse_errors) - 5} more")
# Perform cleanup
if not dry_run:
print(f"\nDeleting {len(unused_recordings)} unused recordings...")
for file, _ in unused_recordings:
file.unlink()
print(f" Deleted: {file.relative_to(REPO_ROOT / 'tests/integration')}")
print("✅ Cleanup complete")
else:
print("\n(Dry run - no files deleted)")
print("\nTo delete these files, run with --delete")
return len(unused_recordings)
def main():
parser = argparse.ArgumentParser(
description="Clean up unused test recordings based on CI test collection",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument("--delete", action="store_true", help="Actually delete unused recordings (default is dry-run)")
parser.add_argument("--manifest", help="Save collected test IDs to file (optional)")
args = parser.parse_args()
print("=" * 60)
print("Recording Cleanup Utility")
print("=" * 60)
ci_configs = iter_all_ci_configs()
print(f"\nDetected CI configurations: {len(ci_configs)}")
for config in ci_configs:
print(f" - suite={config['suite']}, setup={config['setup']}")
# Collect test IDs from CI configurations
ci_test_ids, _ = collect_ci_tests()
if args.manifest:
with open(args.manifest, "w") as f:
for test_id in sorted(ci_test_ids):
f.write(f"{test_id}\n")
print(f"\nSaved test IDs to: {args.manifest}")
# Analyze and cleanup
unused_count = analyze_recordings(ci_test_ids, dry_run=not args.delete)
print("\n" + "=" * 60)
if unused_count > 0 and not args.delete:
print("Run with --delete to remove unused recordings")
if __name__ == "__main__":
main()

61
scripts/generate_ci_matrix.py Executable file
View file

@ -0,0 +1,61 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""
Generate CI test matrix from ci_matrix.json with schedule/input overrides.
This script is used by .github/workflows/integration-tests.yml to generate
the test matrix dynamically based on the CI_MATRIX definition.
"""
import json
from pathlib import Path
CI_MATRIX_FILE = Path(__file__).parent.parent / "tests/integration/ci_matrix.json"
with open(CI_MATRIX_FILE) as f:
matrix_config = json.load(f)
DEFAULT_MATRIX = matrix_config["default"]
SCHEDULE_MATRICES: dict[str, list[dict[str, str]]] = matrix_config.get("schedules", {})
def generate_matrix(schedule="", test_setup=""):
"""
Generate test matrix based on schedule or manual input.
Args:
schedule: GitHub cron schedule string (e.g., "1 0 * * 0" for weekly)
test_setup: Manual test setup input (e.g., "ollama-vision")
Returns:
Matrix configuration as JSON string
"""
# Weekly scheduled test matrices
if schedule and schedule in SCHEDULE_MATRICES:
matrix = SCHEDULE_MATRICES[schedule]
# Manual input for specific setup
elif test_setup == "ollama-vision":
matrix = [{"suite": "vision", "setup": "ollama-vision"}]
# Default: use JSON-defined matrix
else:
matrix = DEFAULT_MATRIX
# GitHub Actions expects {"include": [...]} format
return json.dumps({"include": matrix})
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Generate CI test matrix")
parser.add_argument("--schedule", default="", help="GitHub schedule cron string")
parser.add_argument("--test-setup", default="", help="Manual test setup input")
args = parser.parse_args()
print(generate_matrix(args.schedule, args.test_setup))

View file

@ -227,14 +227,16 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
echo "=== Starting Llama Stack Server ==="
export LLAMA_STACK_LOG_WIDTH=120
# Configure telemetry collector for server mode
# Use a fixed port for the OTEL collector so the server can connect to it
COLLECTOR_PORT=4317
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
export OTEL_BSP_SCHEDULE_DELAY="200"
export OTEL_BSP_EXPORT_TIMEOUT="2000"
# Configure telemetry collector for server mode
# Use a fixed port for the OTEL collector so the server can connect to it
COLLECTOR_PORT=4317
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
# Disabled: https://github.com/llamastack/llama-stack/issues/4089
#export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
export OTEL_BSP_SCHEDULE_DELAY="200"
export OTEL_BSP_EXPORT_TIMEOUT="2000"
export OTEL_METRIC_EXPORT_INTERVAL="200"
# remove "server:" from STACK_CONFIG
stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
@ -336,7 +338,11 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
DOCKER_ENV_VARS=""
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
# Disabled: https://github.com/llamastack/llama-stack/issues/4089
#DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_METRIC_EXPORT_INTERVAL=200"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_SCHEDULE_DELAY=200"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_EXPORT_TIMEOUT=2000"
# Pass through API keys if they exist
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
@ -349,6 +355,10 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
[ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
if [[ "$TEST_SETUP" == "vllm" ]]; then
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e VLLM_URL=http://localhost:8000/v1"
fi
# Determine the actual image name (may have localhost/ prefix)
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
if [[ -z "$IMAGE_NAME" ]]; then
@ -401,11 +411,6 @@ fi
echo "=== Running Integration Tests ==="
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
# Additional exclusions for vllm setup
if [[ "$TEST_SETUP" == "vllm" ]]; then
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
fi
PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
if [[ -n "$TEST_PATTERN" ]]; then
PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"

View file

@ -6,26 +6,22 @@
from .conversations import (
Conversation,
ConversationCreateRequest,
ConversationDeletedResource,
ConversationItem,
ConversationItemCreateRequest,
ConversationItemDeletedResource,
ConversationItemList,
Conversations,
ConversationUpdateRequest,
Metadata,
)
__all__ = [
"Conversation",
"ConversationCreateRequest",
"ConversationDeletedResource",
"ConversationItem",
"ConversationItemCreateRequest",
"ConversationItemDeletedResource",
"ConversationItemList",
"Conversations",
"ConversationUpdateRequest",
"Metadata",
]

View file

@ -102,32 +102,6 @@ register_schema(ConversationItem, name="ConversationItem")
# ]
@json_schema_type
class ConversationCreateRequest(BaseModel):
"""Request body for creating a conversation."""
items: list[ConversationItem] | None = Field(
default=[],
description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
max_length=20,
)
metadata: Metadata | None = Field(
default={},
description="Set of 16 key-value pairs that can be attached to an object. Useful for storing additional information",
max_length=16,
)
@json_schema_type
class ConversationUpdateRequest(BaseModel):
"""Request body for updating a conversation."""
metadata: Metadata = Field(
...,
description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters.",
)
@json_schema_type
class ConversationDeletedResource(BaseModel):
"""Response for deleted conversation."""

View file

@ -260,7 +260,7 @@ class VectorStoreSearchResponsePage(BaseModel):
"""
object: str = "vector_store.search_results.page"
search_query: str
search_query: list[str]
data: list[VectorStoreSearchResponse]
has_more: bool = False
next_page: str | None = None
@ -478,7 +478,7 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
name: str | None = None
file_ids: list[str] | None = None
expires_after: dict[str, Any] | None = None
chunking_strategy: dict[str, Any] | None = None
chunking_strategy: VectorStoreChunkingStrategy | None = None
metadata: dict[str, Any] | None = None

View file

@ -46,6 +46,10 @@ class StackListDeps(Subcommand):
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
# always keep implementation completely silo-ed away from CLI so CLI
# can be fast to load and reduces dependencies
if not args.config and not args.providers:
self.parser.print_help()
self.parser.exit()
from ._list_deps import run_stack_list_deps_command
return run_stack_list_deps_command(args)

View file

@ -9,48 +9,69 @@ from pathlib import Path
from llama_stack.cli.subcommand import Subcommand
from llama_stack.cli.table import print_table
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
class StackListBuilds(Subcommand):
"""List built stacks in .llama/distributions directory"""
"""List available distributions (both built-in and custom)"""
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"list",
prog="llama stack list",
description="list the build stacks",
description="list available distributions",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._list_stack_command)
def _get_distribution_dirs(self) -> dict[str, Path]:
"""Return a dictionary of distribution names and their paths"""
distributions = {}
dist_dir = Path.home() / ".llama" / "distributions"
def _get_distribution_dirs(self) -> dict[str, tuple[Path, str]]:
"""Return a dictionary of distribution names and their paths with source type
Returns:
dict mapping distro name to (path, source_type) where source_type is 'built-in' or 'custom'
"""
distributions = {}
# Get built-in distributions from source code
distro_dir = Path(__file__).parent.parent.parent / "distributions"
if distro_dir.exists():
for stack_dir in distro_dir.iterdir():
if stack_dir.is_dir() and not stack_dir.name.startswith(".") and not stack_dir.name.startswith("__"):
distributions[stack_dir.name] = (stack_dir, "built-in")
# Get custom/run distributions from ~/.llama/distributions
# These override built-in ones if they have the same name
if DISTRIBS_BASE_DIR.exists():
for stack_dir in DISTRIBS_BASE_DIR.iterdir():
if stack_dir.is_dir() and not stack_dir.name.startswith("."):
# Clean up the name (remove llamastack- prefix if present)
name = stack_dir.name.replace("llamastack-", "")
distributions[name] = (stack_dir, "custom")
if dist_dir.exists():
for stack_dir in dist_dir.iterdir():
if stack_dir.is_dir():
distributions[stack_dir.name] = stack_dir
return distributions
def _list_stack_command(self, args: argparse.Namespace) -> None:
distributions = self._get_distribution_dirs()
if not distributions:
print("No stacks found in ~/.llama/distributions")
print("No distributions found")
return
headers = ["Stack Name", "Path"]
headers.extend(["Build Config", "Run Config"])
headers = ["Stack Name", "Source", "Path", "Build Config", "Run Config"]
rows = []
for name, path in distributions.items():
row = [name, str(path)]
for name, (path, source_type) in sorted(distributions.items()):
row = [name, source_type, str(path)]
# Check for build and run config files
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
# For built-in distributions, configs are named build.yaml and run.yaml
# For custom distributions, configs are named {name}-build.yaml and {name}-run.yaml
if source_type == "built-in":
build_config = "Yes" if (path / "build.yaml").exists() else "No"
run_config = "Yes" if (path / "run.yaml").exists() else "No"
else:
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
row.extend([build_config, run_config])
rows.append(row)
print_table(rows, headers, separate_rows=True)

View file

@ -20,6 +20,8 @@ from llama_stack.apis.vector_io import (
SearchRankingOptions,
VectorIO,
VectorStoreChunkingStrategy,
VectorStoreChunkingStrategyStatic,
VectorStoreChunkingStrategyStaticConfig,
VectorStoreDeleteResponse,
VectorStoreFileBatchObject,
VectorStoreFileContentsResponse,
@ -167,6 +169,13 @@ class VectorIORouter(VectorIO):
if embedding_dimension is not None:
params.model_extra["embedding_dimension"] = embedding_dimension
# Set chunking strategy explicitly if not provided
if params.chunking_strategy is None or params.chunking_strategy.type == "auto":
# actualize the chunking strategy to static
params.chunking_strategy = VectorStoreChunkingStrategyStatic(
static=VectorStoreChunkingStrategyStaticConfig()
)
return await provider.openai_create_vector_store(params)
async def openai_list_vector_stores(
@ -283,6 +292,8 @@ class VectorIORouter(VectorIO):
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
if chunking_strategy is None or chunking_strategy.type == "auto":
chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_attach_file_to_vector_store(
vector_store_id=vector_store_id,

View file

@ -427,6 +427,7 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
"counters": {},
"gauges": {},
"up_down_counters": {},
"histograms": {},
}
_global_lock = threading.Lock()
_TRACER_PROVIDER = None
@ -540,6 +541,16 @@ class Telemetry:
)
return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["histograms"]:
_GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
name=name,
unit=unit,
description=f"Histogram for {name}",
)
return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
def _log_metric(self, event: MetricEvent) -> None:
# Add metric as an event to the current span
try:
@ -571,7 +582,16 @@ class Telemetry:
# Log to OpenTelemetry meter if available
if self.meter is None:
return
if isinstance(event.value, int):
# Use histograms for token-related metrics (per-request measurements)
# Use counters for other cumulative metrics
token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
if event.metric in token_metrics:
# Token metrics are per-request measurements, use histogram
histogram = self._get_or_create_histogram(event.metric, event.unit)
histogram.record(event.value, attributes=_clean_attributes(event.attributes))
elif isinstance(event.value, int):
counter = self._get_or_create_counter(event.metric, event.unit)
counter.add(event.value, attributes=_clean_attributes(event.attributes))
elif isinstance(event.value, float):

View file

@ -52,7 +52,17 @@ def resolve_config_or_distro(
logger.debug(f"Using distribution: {distro_config}")
return distro_config
# Strategy 3: Try as built distribution name
# Strategy 3: Try as distro config path (if no .yaml extension and contains a slash)
# eg: starter::run-with-postgres-store.yaml
# Use :: to avoid slash and confusion with a filesystem path
if "::" in config_or_distro:
distro_name, config_name = config_or_distro.split("::")
distro_config = _get_distro_config_path(distro_name, config_name)
if distro_config.exists():
logger.info(f"Using distribution: {distro_config}")
return distro_config
# Strategy 4: Try as built distribution name
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
if distrib_config.exists():
logger.debug(f"Using built distribution: {distrib_config}")
@ -63,13 +73,15 @@ def resolve_config_or_distro(
logger.debug(f"Using built distribution: {distrib_config}")
return distrib_config
# Strategy 4: Failed - provide helpful error
# Strategy 5: Failed - provide helpful error
raise ValueError(_format_resolution_error(config_or_distro, mode))
def _get_distro_config_path(distro_name: str, mode: Mode) -> Path:
def _get_distro_config_path(distro_name: str, mode: str) -> Path:
"""Get the config file path for a distro."""
return DISTRO_DIR / distro_name / f"{mode}.yaml"
if not mode.endswith(".yaml"):
mode = f"{mode}.yaml"
return DISTRO_DIR / distro_name / mode
def _format_resolution_error(config_or_distro: str, mode: Mode) -> str:

View file

@ -84,6 +84,15 @@ def run_command(command: list[str]) -> int:
text=True,
check=False,
)
# Print stdout and stderr if command failed
if result.returncode != 0:
log.error(f"Command {' '.join(command)} failed with returncode {result.returncode}")
if result.stdout:
log.error(f"STDOUT: {result.stdout}")
if result.stderr:
log.error(f"STDERR: {result.stderr}")
return result.returncode
except subprocess.SubprocessError as e:
log.error(f"Subprocess error: {e}")

View file

@ -56,4 +56,5 @@ image_type: venv
additional_pip_packages:
- aiosqlite
- asyncpg
- psycopg2-binary
- sqlalchemy[asyncio]

View file

@ -13,5 +13,6 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
def get_distribution_template() -> DistributionTemplate:
template = get_starter_distribution_template(name="ci-tests")
template.description = "CI tests for Llama Stack"
template.run_configs.pop("run-with-postgres-store.yaml", None)
return template

View file

@ -1,7 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .postgres_demo import get_distribution_template # noqa: F401

View file

@ -1,23 +0,0 @@
version: 2
distribution_spec:
description: Quick start template for running Llama Stack with several popular providers
providers:
inference:
- provider_type: remote::vllm
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: remote::chromadb
safety:
- provider_type: inline::llama-guard
agents:
- provider_type: inline::meta-reference
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol
image_type: venv
additional_pip_packages:
- asyncpg
- psycopg2-binary
- sqlalchemy[asyncio]

View file

@ -1,125 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.models import ModelType
from llama_stack.core.datatypes import (
BuildProvider,
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.distributions.template import (
DistributionTemplate,
RunConfigSettings,
)
from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
def get_distribution_template() -> DistributionTemplate:
inference_providers = [
Provider(
provider_id="vllm-inference",
provider_type="remote::vllm",
config=VLLMInferenceAdapterConfig.sample_run_config(
url="${env.VLLM_URL:=http://localhost:8000/v1}",
),
),
]
providers = {
"inference": [
BuildProvider(provider_type="remote::vllm"),
BuildProvider(provider_type="inline::sentence-transformers"),
],
"vector_io": [BuildProvider(provider_type="remote::chromadb")],
"safety": [BuildProvider(provider_type="inline::llama-guard")],
"agents": [BuildProvider(provider_type="inline::meta-reference")],
"tool_runtime": [
BuildProvider(provider_type="remote::brave-search"),
BuildProvider(provider_type="remote::tavily-search"),
BuildProvider(provider_type="inline::rag-runtime"),
BuildProvider(provider_type="remote::model-context-protocol"),
],
}
name = "postgres-demo"
vector_io_providers = [
Provider(
provider_id="${env.ENABLE_CHROMADB:+chromadb}",
provider_type="remote::chromadb",
config=ChromaVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
url="${env.CHROMADB_URL:=}",
),
),
]
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::rag",
provider_id="rag-runtime",
),
]
default_models = [
ModelInput(
model_id="${env.INFERENCE_MODEL}",
provider_id="vllm-inference",
)
]
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
embedding_model = ModelInput(
model_id="nomic-embed-text-v1.5",
provider_id=embedding_provider.provider_id,
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 768,
},
)
return DistributionTemplate(
name=name,
distro_type="self_hosted",
description="Quick start template for running Llama Stack with several popular providers",
container_image=None,
template_path=None,
providers=providers,
available_models_by_provider={},
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": inference_providers + [embedding_provider],
"vector_io": vector_io_providers,
},
default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
storage_backends={
"kv_default": PostgresKVStoreConfig.sample_run_config(
table_name="llamastack_kvstore",
),
"sql_default": PostgresSqlStoreConfig.sample_run_config(),
},
),
},
run_config_env_vars={
"LLAMA_STACK_PORT": (
"8321",
"Port for the Llama Stack distribution server",
),
},
)

View file

@ -57,4 +57,5 @@ image_type: venv
additional_pip_packages:
- aiosqlite
- asyncpg
- psycopg2-binary
- sqlalchemy[asyncio]

View file

@ -0,0 +1,281 @@
version: 2
image_name: starter-gpu
apis:
- agents
- batches
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras
config:
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
- provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi
config:
url: ${env.TGI_URL:=}
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock
provider_type: remote::bedrock
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:=}
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
- provider_id: anthropic
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY:=}
- provider_id: gemini
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure
config:
api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
metadata_store:
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
- provider_id: code-scanner
provider_type: inline::code-scanner
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
post_training:
- provider_id: huggingface-gpu
provider_type: inline::huggingface-gpu
config:
checkpoint_format: huggingface
distributed_backend: null
device: cpu
dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
batches:
- provider_id: reference
provider_type: inline::reference
config:
kvstore:
namespace: batches
backend: kv_postgres
storage:
backends:
kv_postgres:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_postgres:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
stores:
metadata:
namespace: registry
backend: kv_postgres
inference:
table_name: inference_store
backend: sql_postgres
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_postgres
prompts:
namespace: prompts
backend: kv_postgres
registered_resources:
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups: []
server:
port: 8321
telemetry:
enabled: true

View file

@ -57,4 +57,5 @@ image_type: venv
additional_pip_packages:
- aiosqlite
- asyncpg
- psycopg2-binary
- sqlalchemy[asyncio]

View file

@ -0,0 +1,278 @@
version: 2
image_name: starter
apis:
- agents
- batches
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras
config:
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
- provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi
config:
url: ${env.TGI_URL:=}
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock
provider_type: remote::bedrock
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:=}
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
- provider_id: anthropic
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY:=}
- provider_id: gemini
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure
config:
api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store:
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
- provider_id: code-scanner
provider_type: inline::code-scanner
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
post_training:
- provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu
config:
checkpoint_format: meta
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
batches:
- provider_id: reference
provider_type: inline::reference
config:
kvstore:
namespace: batches
backend: kv_postgres
storage:
backends:
kv_postgres:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_postgres:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
stores:
metadata:
namespace: registry
backend: kv_postgres
inference:
table_name: inference_store
backend: sql_postgres
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_postgres
prompts:
namespace: prompts
backend: kv_postgres
registered_resources:
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups: []
server:
port: 8321
telemetry:
enabled: true

View file

@ -17,6 +17,11 @@ from llama_stack.core.datatypes import (
ToolGroupInput,
VectorStoresConfig,
)
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
SqlStoreReference,
)
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
from llama_stack.providers.datatypes import RemoteProviderSpec
@ -36,6 +41,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
)
from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
@ -181,6 +187,62 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
),
]
postgres_config = PostgresSqlStoreConfig.sample_run_config()
default_overrides = {
"inference": remote_inference_providers + [embedding_provider],
"vector_io": [
Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="sqlite-vec",
provider_type="inline::sqlite-vec",
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="${env.MILVUS_URL:+milvus}",
provider_type="inline::milvus",
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="${env.CHROMADB_URL:+chromadb}",
provider_type="remote::chromadb",
config=ChromaVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}/",
url="${env.CHROMADB_URL:=}",
),
),
Provider(
provider_id="${env.PGVECTOR_DB:+pgvector}",
provider_type="remote::pgvector",
config=PGVectorVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
db="${env.PGVECTOR_DB:=}",
user="${env.PGVECTOR_USER:=}",
password="${env.PGVECTOR_PASSWORD:=}",
),
),
Provider(
provider_id="${env.QDRANT_URL:+qdrant}",
provider_type="remote::qdrant",
config=QdrantVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
url="${env.QDRANT_URL:=}",
),
),
Provider(
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
provider_type="remote::weaviate",
config=WeaviateVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
),
),
],
"files": [files_provider],
}
return DistributionTemplate(
name=name,
@ -189,64 +251,10 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
container_image=None,
template_path=None,
providers=providers,
additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())),
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": remote_inference_providers + [embedding_provider],
"vector_io": [
Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="sqlite-vec",
provider_type="inline::sqlite-vec",
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="${env.MILVUS_URL:+milvus}",
provider_type="inline::milvus",
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="${env.CHROMADB_URL:+chromadb}",
provider_type="remote::chromadb",
config=ChromaVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}/",
url="${env.CHROMADB_URL:=}",
),
),
Provider(
provider_id="${env.PGVECTOR_DB:+pgvector}",
provider_type="remote::pgvector",
config=PGVectorVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
db="${env.PGVECTOR_DB:=}",
user="${env.PGVECTOR_USER:=}",
password="${env.PGVECTOR_PASSWORD:=}",
),
),
Provider(
provider_id="${env.QDRANT_URL:+qdrant}",
provider_type="remote::qdrant",
config=QdrantVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
url="${env.QDRANT_URL:=}",
),
),
Provider(
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
provider_type="remote::weaviate",
config=WeaviateVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
),
),
],
"files": [files_provider],
},
provider_overrides=default_overrides,
default_models=[],
default_tool_groups=default_tool_groups,
default_shields=default_shields,
@ -261,6 +269,55 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
default_shield_id="llama-guard",
),
),
"run-with-postgres-store.yaml": RunConfigSettings(
provider_overrides={
**default_overrides,
"agents": [
Provider(
provider_id="meta-reference",
provider_type="inline::meta-reference",
config=dict(
persistence_store=postgres_config,
responses_store=postgres_config,
),
)
],
"batches": [
Provider(
provider_id="reference",
provider_type="inline::reference",
config=dict(
kvstore=KVStoreReference(
backend="kv_postgres",
namespace="batches",
).model_dump(exclude_none=True),
),
)
],
},
storage_backends={
"kv_postgres": PostgresKVStoreConfig.sample_run_config(),
"sql_postgres": postgres_config,
},
storage_stores={
"metadata": KVStoreReference(
backend="kv_postgres",
namespace="registry",
).model_dump(exclude_none=True),
"inference": InferenceStoreReference(
backend="sql_postgres",
table_name="inference_store",
).model_dump(exclude_none=True),
"conversations": SqlStoreReference(
backend="sql_postgres",
table_name="openai_conversations",
).model_dump(exclude_none=True),
"prompts": KVStoreReference(
backend="kv_postgres",
namespace="prompts",
).model_dump(exclude_none=True),
},
),
},
run_config_env_vars={
"LLAMA_STACK_PORT": (

View file

@ -10,8 +10,8 @@ from .config import PassthroughImplConfig
class PassthroughProviderDataValidator(BaseModel):
url: str
api_key: str
passthrough_url: str
passthrough_api_key: str
async def get_adapter_impl(config: PassthroughImplConfig, _deps):

View file

@ -6,7 +6,7 @@
from typing import Any
from pydantic import Field, SecretStr
from pydantic import Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@ -19,11 +19,6 @@ class PassthroughImplConfig(RemoteInferenceProviderConfig):
description="The URL for the passthrough endpoint",
)
api_key: SecretStr | None = Field(
default=None,
description="API Key for the passthrouth endpoint",
)
@classmethod
def sample_run_config(
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs

View file

@ -5,9 +5,8 @@
# the root directory of this source tree.
from collections.abc import AsyncIterator
from typing import Any
from llama_stack_client import AsyncLlamaStackClient
from openai import AsyncOpenAI
from llama_stack.apis.inference import (
Inference,
@ -20,103 +19,117 @@ from llama_stack.apis.inference import (
OpenAIEmbeddingsResponse,
)
from llama_stack.apis.models import Model
from llama_stack.core.library_client import convert_pydantic_to_json_value
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.core.request_headers import NeedsRequestProviderData
from .config import PassthroughImplConfig
class PassthroughInferenceAdapter(Inference):
class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
def __init__(self, config: PassthroughImplConfig) -> None:
ModelRegistryHelper.__init__(self)
self.config = config
async def initialize(self) -> None:
pass
async def shutdown(self) -> None:
pass
async def unregister_model(self, model_id: str) -> None:
pass
async def register_model(self, model: Model) -> Model:
return model
def _get_client(self) -> AsyncLlamaStackClient:
passthrough_url = None
passthrough_api_key = None
provider_data = None
async def list_models(self) -> list[Model]:
"""List models by calling the downstream /v1/models endpoint."""
client = self._get_openai_client()
if self.config.url is not None:
passthrough_url = self.config.url
else:
provider_data = self.get_request_provider_data()
if provider_data is None or not provider_data.passthrough_url:
raise ValueError(
'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
)
passthrough_url = provider_data.passthrough_url
response = await client.models.list()
if self.config.api_key is not None:
passthrough_api_key = self.config.api_key.get_secret_value()
else:
provider_data = self.get_request_provider_data()
if provider_data is None or not provider_data.passthrough_api_key:
raise ValueError(
'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
)
passthrough_api_key = provider_data.passthrough_api_key
# Convert from OpenAI format to Llama Stack Model format
models = []
for model_data in response.data:
downstream_model_id = model_data.id
custom_metadata = getattr(model_data, "custom_metadata", {}) or {}
return AsyncLlamaStackClient(
base_url=passthrough_url,
api_key=passthrough_api_key,
provider_data=provider_data,
# Prefix identifier with provider ID for local registry
local_identifier = f"{self.__provider_id__}/{downstream_model_id}"
model = Model(
identifier=local_identifier,
provider_id=self.__provider_id__,
provider_resource_id=downstream_model_id,
model_type=custom_metadata.get("model_type", "llm"),
metadata=custom_metadata,
)
models.append(model)
return models
async def should_refresh_models(self) -> bool:
"""Passthrough should refresh models since they come from downstream dynamically."""
return self.config.refresh_models
def _get_openai_client(self) -> AsyncOpenAI:
"""Get an AsyncOpenAI client configured for the downstream server."""
base_url = self._get_passthrough_url()
api_key = self._get_passthrough_api_key()
return AsyncOpenAI(
base_url=f"{base_url.rstrip('/')}/v1",
api_key=api_key,
)
async def openai_embeddings(
self,
params: OpenAIEmbeddingsRequestWithExtraBody,
) -> OpenAIEmbeddingsResponse:
raise NotImplementedError()
def _get_passthrough_url(self) -> str:
"""Get the passthrough URL from config or provider data."""
if self.config.url is not None:
return self.config.url
provider_data = self.get_request_provider_data()
if provider_data is None:
raise ValueError(
'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
)
return provider_data.passthrough_url
def _get_passthrough_api_key(self) -> str:
"""Get the passthrough API key from config or provider data."""
if self.config.auth_credential is not None:
return self.config.auth_credential.get_secret_value()
provider_data = self.get_request_provider_data()
if provider_data is None:
raise ValueError(
'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
)
return provider_data.passthrough_api_key
async def openai_completion(
self,
params: OpenAICompletionRequestWithExtraBody,
) -> OpenAICompletion:
client = self._get_client()
model_obj = await self.model_store.get_model(params.model)
params = params.model_copy()
params.model = model_obj.provider_resource_id
"""Forward completion request to downstream using OpenAI client."""
client = self._get_openai_client()
request_params = params.model_dump(exclude_none=True)
return await client.inference.openai_completion(**request_params)
response = await client.completions.create(**request_params)
return response # type: ignore
async def openai_chat_completion(
self,
params: OpenAIChatCompletionRequestWithExtraBody,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
client = self._get_client()
model_obj = await self.model_store.get_model(params.model)
params = params.model_copy()
params.model = model_obj.provider_resource_id
"""Forward chat completion request to downstream using OpenAI client."""
client = self._get_openai_client()
request_params = params.model_dump(exclude_none=True)
response = await client.chat.completions.create(**request_params)
return response # type: ignore
return await client.inference.openai_chat_completion(**request_params)
def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]:
json_params = {}
for key, value in request_params.items():
json_input = convert_pydantic_to_json_value(value)
if isinstance(json_input, dict):
json_input = {k: v for k, v in json_input.items() if v is not None}
elif isinstance(json_input, list):
json_input = [x for x in json_input if x is not None]
new_input = []
for x in json_input:
if isinstance(x, dict):
x = {k: v for k, v in x.items() if v is not None}
new_input.append(x)
json_input = new_input
json_params[key] = json_input
return json_params
async def openai_embeddings(
self,
params: OpenAIEmbeddingsRequestWithExtraBody,
) -> OpenAIEmbeddingsResponse:
"""Forward embeddings request to downstream using OpenAI client."""
client = self._get_openai_client()
request_params = params.model_dump(exclude_none=True)
response = await client.embeddings.create(**request_params)
return response # type: ignore

View file

@ -283,8 +283,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
# ...
provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
if "embedding" in functions:
embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
context_length = model_spec["model_limits"]["max_sequence_length"]
embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0)
context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0)
embedding_metadata = {
"embedding_dimension": embedding_dimension,
"context_length": context_length,
@ -306,10 +306,6 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
metadata={},
model_type=ModelType.llm,
)
# In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
# In that case, the cache will record the generator Model object, and the list which we return will have
# both the generator Model object and the text chat Model object. That's fine because the cache is
# only used for check_model_availability() anyway.
self._model_cache[provider_resource_id] = model
models.append(model)
return models

View file

@ -26,6 +26,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategy,
VectorStoreChunkingStrategyAuto,
VectorStoreChunkingStrategyStatic,
VectorStoreChunkingStrategyStaticConfig,
VectorStoreContent,
VectorStoreDeleteResponse,
VectorStoreFileBatchObject,
@ -414,6 +415,10 @@ class OpenAIVectorStoreMixin(ABC):
in_progress=0,
total=0,
)
if not params.chunking_strategy or params.chunking_strategy.type == "auto":
chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
else:
chunking_strategy = params.chunking_strategy
store_info: dict[str, Any] = {
"id": vector_store_id,
"object": "vector_store",
@ -426,7 +431,7 @@ class OpenAIVectorStoreMixin(ABC):
"expires_at": None,
"last_active_at": created_at,
"file_ids": [],
"chunking_strategy": params.chunking_strategy,
"chunking_strategy": chunking_strategy.model_dump(),
}
# Add provider information to metadata if provided
@ -637,7 +642,7 @@ class OpenAIVectorStoreMixin(ABC):
break
return VectorStoreSearchResponsePage(
search_query=search_query,
search_query=query if isinstance(query, list) else [query],
data=data,
has_more=False, # For simplicity, we don't implement pagination here
next_page=None,
@ -647,7 +652,7 @@ class OpenAIVectorStoreMixin(ABC):
logger.error(f"Error searching vector store {vector_store_id}: {e}")
# Return empty results on error
return VectorStoreSearchResponsePage(
search_query=search_query,
search_query=query if isinstance(query, list) else [query],
data=[],
has_more=False,
next_page=None,
@ -886,8 +891,8 @@ class OpenAIVectorStoreMixin(ABC):
# Determine pagination info
has_more = len(file_objects) > limit
first_id = file_objects[0].id if file_objects else None
last_id = file_objects[-1].id if file_objects else None
first_id = limited_files[0].id if file_objects else None
last_id = limited_files[-1].id if file_objects else None
return VectorStoreListFilesResponse(
data=limited_files,

View file

@ -1,422 +0,0 @@
{
"test_id": "tests/integration/batches/test_batches.py::TestBatchesIntegration::test_batch_e2e_embeddings[emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "Hello world",
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.034477483,
0.030899182,
0.0066526434,
0.026075281,
-0.039411988,
-0.16037956,
0.06692074,
-0.006511468,
-0.047467157,
0.014774274,
0.07094562,
0.055527706,
0.019183245,
-0.026297163,
-0.010018651,
-0.02694715,
0.0223884,
-0.02220693,
-0.14977267,
-0.017530814,
0.0075938613,
0.054253556,
0.0032258728,
0.031724673,
-0.08466085,
-0.029342307,
0.05155048,
0.048105717,
-0.0032670307,
-0.05822795,
0.041971523,
0.022229431,
0.1281518,
-0.022270948,
-0.011725874,
0.06294936,
-0.032847952,
-0.09124354,
-0.031128692,
0.05274829,
0.047067728,
-0.08414196,
-0.029979317,
-0.020692566,
0.00949804,
-0.0035992558,
0.0074442336,
0.03928378,
0.09326073,
-0.0037437282,
-0.052663893,
-0.058101393,
-0.006925679,
0.0052269334,
0.08290669,
0.019312402,
0.0062818974,
-0.010331665,
0.008930684,
-0.037712026,
-0.045175705,
0.023950849,
-0.006926045,
0.013429504,
0.100098,
-0.0715888,
-0.021700105,
0.031693522,
-0.05161389,
-0.08224763,
-0.06577986,
-0.009853981,
0.005808086,
0.07364217,
-0.034008067,
0.024907362,
0.014441484,
0.02645124,
0.009659713,
0.030284341,
0.052878983,
-0.07536944,
0.009890014,
0.029907802,
0.017498897,
0.02313779,
0.0018918256,
0.0013156217,
-0.047173936,
-0.011251131,
-0.11422648,
-0.019960148,
0.040278148,
0.0022633963,
-0.07986738,
-0.025357265,
0.094500035,
-0.029062947,
-0.14495483,
0.2309815,
0.027703581,
0.03208736,
0.031073036,
0.042917974,
0.064246915,
0.032118786,
-0.004844535,
0.055775862,
-0.03756279,
-0.021487191,
-0.028432492,
-0.028887685,
0.03842892,
-0.017359573,
0.052465834,
-0.07493626,
-0.031175744,
0.021936033,
-0.039823197,
-0.008681939,
0.026978256,
-0.048551314,
0.011414809,
0.029628372,
-0.020587107,
0.013077965,
0.028824588,
-3.1978743e-33,
0.06475607,
-0.018065408,
0.05190019,
0.12193858,
0.028755108,
0.008794777,
-0.07044016,
-0.016856866,
0.040675826,
0.04222898,
0.025450956,
0.035772353,
-0.049134083,
0.0021395232,
-0.015527445,
0.05065655,
-0.04814189,
0.03586998,
-0.004134139,
0.10165314,
-0.055980552,
-0.010677752,
0.011231545,
0.09068785,
0.004311188,
0.035094332,
-0.009658399,
-0.09383056,
0.092755266,
0.00799794,
-0.0077075018,
-0.052119244,
-0.01259255,
0.0032277475,
0.005989667,
0.0075889886,
0.010571857,
-0.08629758,
-0.06985891,
-0.002511263,
-0.091053724,
0.0468712,
0.05203361,
0.0072902967,
0.010906411,
-0.0052922186,
0.013883815,
0.021929385,
0.0341257,
0.060227357,
0.00018942523,
0.0146624865,
-0.07000342,
0.028425341,
-0.027542787,
0.01082086,
0.03491755,
-0.022430921,
0.0096813915,
0.07725412,
0.021618832,
0.114911504,
-0.06805403,
0.023872944,
-0.015999107,
-0.017794114,
0.06442477,
0.03206309,
0.050293576,
-0.005988605,
-0.03376946,
0.017821673,
0.016567992,
0.063335925,
0.034753703,
0.046586752,
0.09789875,
-0.006560692,
0.025039855,
-0.07780643,
0.016878096,
-0.0010056288,
0.02257608,
-0.0382721,
0.09572481,
-0.005296001,
0.010567662,
-0.11538674,
-0.013233586,
-0.010786205,
-0.083147496,
0.073254965,
0.049377624,
-0.009025328,
-0.0957893,
3.3687185e-33,
0.12494067,
0.019226579,
-0.058172084,
-0.035952393,
-0.050862074,
-0.045700952,
-0.0826631,
0.14819908,
-0.088347495,
0.060315337,
0.05109269,
0.010308115,
0.1411753,
0.030833788,
0.06101746,
-0.052806143,
0.13661332,
0.00917483,
-0.017295862,
-0.0128495265,
-0.007851698,
-0.051084496,
-0.05235087,
0.0076632234,
-0.015217299,
0.017015414,
0.021324545,
0.020506723,
-0.12004153,
0.014523494,
0.026743378,
0.025221687,
-0.04270567,
0.00676352,
-0.014453511,
0.045142446,
-0.091383636,
-0.019459482,
-0.017806036,
-0.055010412,
-0.05270923,
-0.010370778,
-0.052053526,
0.020918628,
-0.080037735,
-0.012147244,
-0.057777684,
0.023249507,
-0.007838778,
-0.025807643,
-0.07987164,
-0.020683115,
0.04888083,
-0.020459235,
-0.049192864,
0.01407799,
-0.063744746,
-0.0077936463,
0.016429903,
-0.025707569,
0.013326097,
0.026210392,
0.009855086,
0.06317218,
0.0026150644,
-0.0065879063,
0.0166049,
0.032400407,
0.038005095,
-0.036269873,
-0.0069020875,
0.00019545198,
-0.0017537851,
-0.027427403,
-0.02801922,
0.049696837,
-0.028842367,
-0.0023814398,
0.01481421,
0.00976869,
0.0057697925,
0.01341087,
0.00551593,
0.037237898,
0.007291808,
0.040068958,
0.08141818,
0.07197348,
-0.013163506,
-0.042782705,
-0.010938265,
0.0049547236,
-0.00923014,
0.035068717,
-0.051007,
-1.5708556e-08,
-0.088558294,
0.02391312,
-0.016132735,
0.03169382,
0.027184812,
0.052484553,
-0.047118798,
-0.058789898,
-0.063239954,
0.040775288,
0.049807984,
0.106462926,
-0.07448737,
-0.012401869,
0.018361589,
0.039486438,
-0.024830224,
0.014500051,
-0.03712332,
0.020043189,
8.399218e-05,
0.009852795,
0.024823224,
-0.05252818,
0.02932855,
-0.0871494,
-0.01447227,
0.025996566,
-0.018731978,
-0.07618361,
0.03505914,
0.10363578,
-0.0280213,
0.012769872,
-0.076482065,
-0.018743375,
0.024961015,
0.08152011,
0.06866303,
-0.06411612,
-0.08387694,
0.061479986,
-0.03345598,
-0.10615398,
-0.040166635,
0.032536518,
0.076652974,
-0.07297006,
0.00039833272,
-0.0409393,
-0.07580284,
0.027465926,
0.07468789,
0.017779494,
0.09106629,
0.11033428,
0.00065298256,
0.051472265,
-0.01461242,
0.033237122,
0.023671487,
-0.022980422,
0.038988944,
0.030206418
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 2,
"total_tokens": 2
}
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,13 @@
{
"default": [
{"suite": "base", "setup": "ollama"},
{"suite": "vision", "setup": "ollama-vision"},
{"suite": "responses", "setup": "gpt"},
{"suite": "base-vllm-subset", "setup": "vllm"}
],
"schedules": {
"1 0 * * 0": [
{"suite": "base", "setup": "vllm"}
]
}
}

View file

@ -0,0 +1,89 @@
{
"test_id": null,
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:3b-instruct-fp16",
"created": 1760453641,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "qwen3:4b",
"created": 1757615302,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-oss:latest",
"created": 1756395223,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "nomic-embed-text:latest",
"created": 1756318548,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:3b",
"created": 1755191039,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "all-minilm:l6-v2",
"created": 1753968177,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:1b",
"created": 1746124735,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:latest",
"created": 1746044170,
"object": "model",
"owned_by": "library"
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": null,
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374291,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-f70298e4ea3e4b4eb7f2cc2deb7a2b01",
"object": "model_permission",
"created": 1762374291,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:35:06.037921-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:35:04.346635-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,422 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "Test user parameter",
"encoding_format": "float",
"user": "test-user-123"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
0.043779343,
0.021533398,
-0.081306435,
0.010584965,
-0.079082854,
-0.03219143,
0.13092613,
0.04234389,
-0.11600539,
-0.07588513,
0.04182356,
-0.08061255,
0.038127176,
-0.010701234,
0.015768763,
-0.04193689,
0.04310592,
-0.033361685,
0.013566423,
-0.010392366,
0.015551022,
-0.037858423,
-0.050305344,
-0.025666261,
-0.047879875,
-0.087179765,
0.016856788,
-0.036765736,
0.006393739,
0.020844297,
0.11262393,
-0.002143682,
-0.07910913,
0.038748607,
0.11532516,
-0.019759571,
0.0066967797,
-0.021164352,
-0.014471563,
-0.0027048697,
-0.034388524,
-0.052571636,
-0.030607725,
0.04747725,
-0.02431059,
0.0109337615,
-0.03946421,
0.071846664,
-0.020690937,
0.01898796,
0.042931512,
-0.0077551426,
0.0025911122,
-0.058268107,
0.0117475465,
-0.022701943,
0.0017815019,
-0.012612941,
0.030724185,
0.017728312,
-0.06155491,
-0.03656162,
0.02583153,
0.02537894,
0.012139213,
0.009105951,
-0.027318193,
-0.093389414,
0.005184693,
0.007488449,
-0.07540277,
0.010159999,
-0.028444426,
0.030260745,
0.0036438918,
-0.022627153,
-0.037846327,
-0.08381657,
-0.012445195,
-0.048908208,
0.029149827,
-0.044437535,
-0.07520237,
-0.020924438,
0.06342514,
0.1629199,
0.060563333,
-0.012817673,
-0.031030292,
0.018368995,
0.11223112,
0.07292473,
-0.062686674,
-0.031803295,
-0.017489262,
0.048433464,
-0.041148387,
-0.04183779,
-0.05994369,
0.15909556,
-0.027785666,
-0.012455991,
0.056005318,
-0.019891974,
0.022063067,
0.006342065,
0.0464118,
-0.07311654,
0.033282198,
0.05949105,
-0.033307947,
0.030738499,
0.008186239,
-0.020268966,
0.056593496,
-0.081526734,
0.023390312,
0.0060836566,
-0.07992586,
0.013986445,
0.052250065,
0.027186505,
-0.049284942,
0.028148174,
0.019493744,
0.05418436,
0.0827222,
-1.8825437e-33,
0.01360945,
-0.010870715,
0.015887791,
0.069373555,
-0.051129147,
0.08999179,
0.044494778,
0.08100757,
0.018944906,
-0.020974122,
-0.017938385,
-0.021756735,
0.010972489,
0.015099965,
0.017018452,
0.094338946,
0.0034407445,
0.010244923,
-0.044709302,
0.0018059182,
0.015817573,
-0.065777056,
-0.004948138,
0.0044092103,
-0.019589791,
-0.092789896,
-0.025898295,
0.044104066,
0.0541385,
-0.007362511,
-0.021487307,
-0.036836285,
-0.09148704,
0.084001675,
-0.018094191,
0.003797567,
0.020257449,
0.04394643,
-0.0772898,
0.0057312953,
-0.054519102,
-0.024835315,
0.0753162,
0.034552757,
-0.081203006,
-0.12210961,
-0.0053012627,
0.00780717,
0.050265096,
0.015569535,
-0.056362487,
0.039800324,
0.013022089,
-0.04015537,
0.014401654,
-0.033209093,
-0.008451782,
-0.037590392,
-0.01965779,
0.01730637,
-0.00896531,
-0.0018413392,
-0.0030382746,
0.030460354,
-0.05112036,
-0.086875,
-0.018338922,
-0.11328767,
0.07325826,
0.046035297,
0.012633494,
-0.06343216,
-0.028439038,
0.020128354,
-0.07883383,
-0.00069870794,
-0.03155447,
0.12306934,
0.004300722,
-0.026421167,
0.078361824,
-0.077461444,
-0.021267027,
0.048929654,
0.02919381,
-0.0092880055,
-0.030666346,
-0.04102384,
-0.03860138,
-0.08042292,
0.023227168,
0.04191858,
-0.058156747,
0.0585743,
0.076342255,
4.465569e-34,
-0.019599343,
0.040230304,
0.01455632,
0.034345042,
0.04392999,
-0.023241352,
0.067749046,
-0.03010354,
-0.09075954,
-0.019227842,
-0.027724287,
-0.00062344945,
0.0042892746,
0.053643614,
0.04075099,
0.032581333,
-0.107116826,
-0.0500636,
-0.016655827,
-0.007782394,
-0.111523,
0.07476429,
-0.016019335,
-0.050536986,
-0.11320647,
-0.0061384854,
0.050886273,
-0.030283457,
0.04318923,
0.03301474,
0.02362771,
0.046507858,
-0.03416386,
0.036145207,
0.023037339,
-0.026803765,
0.06361122,
0.09975251,
0.035269737,
0.1554014,
0.083479255,
0.10931981,
0.046847064,
-0.010136355,
-0.032541983,
0.12926093,
0.031193413,
-0.09971323,
0.010830718,
0.02325219,
-0.011917061,
0.010155018,
0.06883269,
0.009340846,
-0.022698723,
-0.042815465,
-0.048211087,
-0.085067384,
0.05105234,
0.045155898,
-0.03564869,
0.06549556,
0.048875004,
0.037915554,
-0.14071068,
-0.067095764,
0.009898252,
-0.0049653547,
-0.044304688,
0.0039006064,
-0.026903173,
-0.066124685,
0.040738244,
-0.052228633,
0.060485654,
-0.041119356,
-0.04312945,
-0.025152665,
0.08556276,
-0.044942576,
0.06393979,
-0.024227533,
-0.05052092,
-0.0020624825,
-0.078943975,
0.0026753,
0.02068896,
0.102683865,
-0.01237572,
0.056172684,
0.06552171,
0.030940128,
-0.07721113,
-0.061241012,
-0.016143149,
-1.3511957e-08,
-0.050416306,
-0.033628013,
0.046722032,
0.04744138,
-0.04411888,
0.04631675,
-0.0060847937,
-0.053873356,
0.013075445,
0.050437532,
-0.009895477,
-0.0041795173,
0.07229928,
0.021081135,
0.02672776,
-0.07482113,
-0.026757998,
0.052755926,
-0.034690056,
0.039811596,
-0.016370349,
0.045900222,
-0.02250936,
0.023861,
0.04912799,
0.09111738,
-0.0024878879,
0.049395334,
-0.03861115,
0.020867983,
0.076049894,
0.084881924,
-0.051956687,
-0.06878504,
-0.061384037,
0.077220954,
-0.06454818,
0.044513144,
0.008181126,
0.015890416,
-0.04280811,
0.005317184,
0.0034429359,
0.0031937633,
-0.013058055,
-0.09134677,
0.06425565,
-0.054977305,
0.0007087448,
-0.06258866,
-0.034974415,
-0.029966963,
0.044276785,
0.017868131,
-0.027976807,
-0.036579583,
0.021142753,
0.06057356,
-0.03133335,
-0.014331035,
0.034653842,
0.052315667,
-0.036585484,
0.028209662
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 3,
"total_tokens": 3
}
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:35:05.489695-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:35:04.346635-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:32:10.993052-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-10-08T11:30:00.392919-07:00",
"size": 2350966784,
"size_vram": 2350966784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,54 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "Test dimensions parameter",
"encoding_format": "float",
"dimensions": 16
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
0.25369987,
0.016355688,
-0.29676768,
0.316427,
-0.18642858,
0.076206245,
-0.031503417,
0.29860005,
-0.496603,
-0.36621967,
0.25334543,
-0.333392,
0.005993569,
0.14079759,
-0.13775977,
-0.14680246
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 3,
"total_tokens": 3
}
}
},
"is_streaming": false
}
}

View file

@ -1,421 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "This is the first text",
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.0010839553,
0.067364,
0.015185306,
0.037240896,
0.029337138,
0.015160007,
0.0743005,
-0.0032980628,
0.06581814,
-0.021851996,
0.034412965,
0.051005766,
-0.011422501,
-0.025062356,
-0.051756065,
0.027193472,
0.07849549,
-0.05999108,
0.010471458,
-0.003400683,
0.043449093,
0.122919865,
9.668583e-05,
0.002153268,
0.018064681,
0.045069378,
-0.09762388,
0.11186886,
0.049657565,
-0.03485217,
-0.039568134,
0.003532146,
0.15894793,
0.06341193,
0.047953114,
0.011617699,
0.009799243,
0.015377702,
0.009379663,
0.079989135,
0.019207356,
-0.13718612,
0.01730099,
0.013687199,
0.014266827,
-0.00022628276,
-0.017710257,
0.02291068,
0.03590651,
-0.015361055,
-0.00978436,
-0.0401825,
-0.011481894,
0.00014050963,
0.08540761,
0.08730027,
0.0046967245,
0.006164595,
0.003031956,
0.008891807,
-0.006260525,
0.05061661,
0.0005252785,
0.0467754,
0.09363822,
-0.012814104,
0.017708639,
-0.062698044,
-0.11535818,
0.041123625,
-0.014939021,
0.044815876,
-0.020868087,
0.042999975,
-0.061038766,
0.019998673,
-0.068740115,
-0.035516046,
0.041884515,
0.012185281,
-0.029084096,
-0.06643917,
0.030638866,
0.05149607,
-0.12815061,
0.06821646,
-0.047070153,
-0.032925386,
0.007499353,
-0.017841771,
0.038296465,
-0.015792726,
0.07054022,
0.038072467,
-0.11428876,
0.04210153,
-0.11162366,
-0.045723915,
-0.028951947,
0.12735675,
-0.013946637,
-0.027157523,
0.07295939,
0.024098422,
-0.054050542,
-0.13125896,
0.03013205,
-0.023223283,
-0.019072957,
-0.007864101,
-0.021954412,
-0.05329901,
-0.07088355,
-0.0115214065,
-0.023399564,
-0.015638318,
0.05148062,
0.029261008,
0.06481798,
0.064031154,
0.014445124,
-0.058017716,
-0.069921836,
-0.023950975,
-0.08490842,
-0.08779567,
0.048162255,
-6.1240354e-33,
0.010315817,
0.038685724,
0.0031864564,
0.0357421,
0.0050265454,
-0.004210234,
-0.053900674,
-0.02988569,
-0.07548199,
-0.078777455,
-0.012271205,
-0.05056629,
0.020729113,
-0.051866043,
-0.059254467,
-0.059903424,
-0.055699438,
0.032196835,
-0.006328442,
-0.021668624,
-0.059921067,
0.0519611,
0.051227964,
-0.063502096,
-0.04873505,
-0.014265467,
0.0025537873,
-0.024346355,
-0.0055181426,
0.02007461,
-0.10196586,
0.010727814,
-0.023194604,
-0.081025146,
-0.014997581,
0.0017926424,
0.045078833,
-0.052792255,
-0.05368693,
-0.013245513,
-0.019808132,
0.020031843,
-0.00081401254,
-0.10117647,
-0.0007066768,
0.09663035,
-0.03946875,
0.04954661,
0.042237334,
0.007943922,
-0.05234212,
0.051887065,
0.03711589,
0.034850314,
0.063441575,
-0.026583876,
-0.009227281,
-0.0025737104,
-0.056082893,
0.0020716325,
-0.020129146,
0.0012315192,
-0.0017609745,
0.019111704,
0.016572498,
-0.011374,
0.010381644,
-0.007864189,
0.04664868,
-0.046856377,
-0.08523834,
-0.008974813,
0.012022968,
0.013285977,
0.015182303,
0.03708482,
0.026587088,
0.014473839,
-0.013946565,
0.01999883,
-0.06888259,
-0.07111367,
0.012369427,
0.032828625,
-0.03152666,
0.045777358,
0.06801705,
-0.07747748,
0.018461134,
0.06620267,
-0.086365156,
0.008950603,
0.041320425,
0.009541193,
0.0066037327,
4.71081e-33,
-0.026172558,
0.0013145636,
-0.014140948,
-0.024360213,
0.06931815,
0.031448748,
0.037257418,
0.06468137,
0.049403396,
0.11072201,
0.04985356,
0.06679111,
0.04153249,
-0.034106053,
0.070283465,
0.034855895,
0.12902643,
-0.021033453,
0.008940618,
0.030177405,
-0.022881329,
0.036504544,
-0.13194299,
0.045612644,
-0.0127895875,
0.04174139,
0.1232064,
-0.013484046,
-0.007285246,
-0.029776007,
0.025007037,
-0.009516822,
0.02475585,
0.023208592,
-0.019141924,
0.02259424,
0.013740329,
-0.038490705,
-0.014461541,
0.075218394,
0.13589163,
0.009839605,
-0.037563317,
-0.02737327,
-0.016485116,
-0.048845276,
-0.03523722,
-0.05439929,
-0.0017957076,
0.03563579,
-0.010255764,
-0.01859244,
-0.03647324,
-0.055985246,
-0.007833892,
0.009086756,
-0.007333394,
0.050386623,
-0.0002305643,
-0.03637248,
-0.024937423,
0.058877032,
-0.07250415,
0.07401245,
0.053917013,
-0.051895224,
-0.006332244,
0.07850189,
-0.01695057,
-0.006673017,
0.012659739,
-0.014127065,
-0.13639799,
-0.08524976,
-0.017533274,
-0.0046930755,
0.013687301,
0.0009185522,
-0.0719948,
-0.06887779,
0.14208324,
0.03187123,
-0.055919908,
0.030401653,
0.061900012,
0.029921472,
-0.00096237566,
-0.065010294,
-0.020657646,
0.039562404,
-0.123846576,
0.0028867351,
0.051196404,
0.13397509,
-0.088453874,
-1.7590333e-08,
-0.025786474,
-0.080303885,
-0.09164947,
0.031999,
0.00584884,
0.11464121,
0.023377793,
-0.06902527,
-0.055941124,
-0.05787791,
0.014640494,
0.080320895,
0.0037027278,
-0.030824674,
0.024432683,
0.008549355,
-0.05291309,
-0.06636625,
0.0007468212,
-0.02379191,
0.030766092,
0.054053318,
-0.0027251292,
-0.09928475,
-0.0150488615,
0.016240431,
-0.0015727071,
0.01190173,
0.007895162,
0.04894733,
0.00487708,
0.08263861,
-0.014527478,
-0.043879665,
0.004633697,
0.024611989,
0.023827499,
0.02366802,
0.050754935,
-0.051841788,
0.0212632,
-0.0034418616,
-0.021175656,
0.020591663,
-0.06475325,
0.0542002,
0.027792262,
-0.05295982,
0.01509645,
-0.11977527,
-0.03416359,
-0.012206606,
0.047451705,
0.020876253,
-0.026368074,
0.01502373,
0.033982284,
0.059788153,
-0.052526973,
0.03356499,
0.061180886,
0.096336305,
0.116353564,
-0.016122948
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5
}
}
},
"is_streaming": false
}
}

View file

@ -1,421 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "Test encoding format",
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
0.019109152,
-0.0205217,
-0.071471564,
-0.023057504,
-0.06572786,
-0.0057331678,
-0.029395059,
-0.031822033,
-0.015748156,
-0.039123703,
0.02694331,
-0.0641754,
0.013510709,
0.050364953,
-0.03114308,
-0.08322274,
-0.03192984,
0.074970365,
-0.016377378,
-0.0013804765,
0.03850419,
-0.03441017,
-0.0048610102,
-0.03094053,
0.051915165,
0.009193639,
0.0071807485,
0.066353165,
0.024559105,
-0.04767663,
0.0376255,
-0.042586852,
0.078906916,
0.04827334,
0.13389648,
0.013978803,
0.03242126,
-0.08890431,
-0.014188366,
0.03553346,
-0.02476171,
-0.028628638,
0.047652308,
0.026259335,
0.048472118,
0.06663718,
-0.013584004,
0.071824096,
-0.073066786,
-0.050326068,
0.0039502876,
0.03300394,
-0.047816053,
-0.017657546,
0.010284664,
-0.10525716,
-0.010034394,
0.014627846,
-0.053289402,
0.060343288,
-0.10079798,
0.011359217,
-0.007258805,
0.05346498,
-0.0068726647,
0.03697505,
0.024016414,
0.023924585,
-0.011357761,
-0.119573325,
-0.115692526,
-0.06673285,
-0.04233929,
0.09302018,
0.02486003,
0.084047645,
0.0030104683,
-0.06605523,
0.027435688,
-0.032412402,
-0.025584543,
-0.06590182,
0.067799605,
0.0976311,
0.07360619,
0.034108408,
0.056534845,
0.076705806,
-0.05179011,
0.053681813,
0.0054462817,
0.015972052,
0.0035656213,
0.06333522,
-0.01597322,
0.05295729,
0.11539089,
0.055200845,
0.037667733,
0.08083974,
0.035557732,
-0.07982552,
-0.012100598,
-0.07612801,
-0.0695667,
-0.017815348,
0.16996554,
-0.0048157335,
0.09073964,
-0.07196438,
0.020009195,
-0.05956153,
-0.06312686,
-0.07716358,
0.0150949685,
-0.050339524,
-0.05444592,
-0.023078114,
-0.035431463,
-0.030625492,
-0.053284056,
-0.06745872,
-0.08049862,
0.002800386,
-0.0114065055,
-0.029938627,
0.024243163,
-1.5107368e-33,
-0.02984805,
-0.00033025863,
0.0030491,
0.023082128,
-0.04808977,
-0.0027841914,
-0.037461873,
0.016201235,
-0.02998979,
0.015712254,
0.009664366,
-0.03984875,
-0.029493092,
0.03837007,
-0.005226541,
0.06857773,
-0.007891026,
-0.0019036188,
-0.035219382,
0.03627955,
0.05867878,
0.023777487,
0.044425115,
-0.025999734,
-0.025318418,
-0.02685328,
-0.02368557,
-0.094386704,
0.0016880591,
0.0065193563,
-0.09711005,
-0.053493332,
-0.08241291,
0.023502836,
-0.02407441,
0.015992055,
0.0050546136,
0.030476829,
-0.088438906,
0.11427086,
0.028378993,
0.02985018,
0.022821706,
0.018776013,
0.056330692,
-0.020254886,
-0.00070521404,
-0.0864014,
0.020228866,
-0.0039839754,
0.0010032665,
0.065425254,
-0.036518592,
0.032341316,
0.023112345,
0.044507477,
0.09644409,
-0.07272818,
0.03370691,
0.042783204,
-0.052776046,
0.0003352446,
0.061005518,
-0.019623613,
-0.023274273,
-0.11602989,
0.007926991,
-0.12529127,
0.017030548,
0.013484081,
-0.030528491,
-0.024298145,
0.006284904,
-0.015568167,
-0.072781205,
0.012985074,
0.015977127,
0.0051657534,
-0.0026022948,
-0.059578825,
0.06372584,
-0.0019363016,
0.018695941,
-0.009242735,
-0.05887247,
-0.032524884,
-0.009591115,
-0.047377545,
0.020585002,
-0.007134836,
0.050135154,
0.016087264,
-0.0058878902,
-0.07661024,
0.0820671,
1.6053074e-33,
-0.0056476775,
0.06719423,
-0.011510322,
0.05586423,
-0.08886697,
-0.036528286,
0.12134926,
0.028969096,
0.022419011,
0.047327086,
0.07621525,
-0.07937209,
0.0020504447,
-0.023489932,
-0.029759271,
-0.04879825,
-0.034876924,
0.06461666,
0.051493492,
0.008284975,
-0.031793926,
0.098015875,
0.008122038,
0.01032072,
0.059404474,
0.05176487,
0.042960417,
0.0069373515,
0.027306866,
0.039226852,
0.062416088,
0.051797673,
0.0053232666,
0.05965781,
-0.008935817,
-0.0135501,
0.08726531,
0.028408607,
-0.006820522,
0.052098107,
0.049510423,
0.055176627,
-0.016774576,
0.077848226,
0.026121203,
0.031311177,
0.011812256,
-0.0341528,
0.052825138,
0.003484205,
0.09811821,
0.029693138,
-0.031354938,
-0.012068096,
0.018686052,
-0.032609653,
-0.09638639,
0.033928476,
-0.07897009,
-0.008300913,
-0.04915284,
0.02006342,
0.061743837,
-0.018412542,
-0.033583082,
-0.090903476,
0.021116566,
-0.022445552,
-0.011814237,
-0.048816226,
0.048287436,
-0.07294675,
-0.02198573,
0.062477604,
0.023308119,
-0.052141402,
-0.05409648,
0.062339973,
0.052301563,
0.051384836,
-0.02426406,
-0.018824687,
-0.01660311,
0.09330242,
0.008502433,
0.063408315,
0.019377569,
0.047027417,
-0.0058769877,
-0.0034505578,
0.07956527,
0.10210641,
0.015302805,
0.04089992,
0.038895626,
-1.2710905e-08,
-0.019304764,
-0.1217849,
-0.047983564,
-0.053382736,
-0.113197215,
0.05181196,
-0.10498226,
-0.08524135,
0.0061870585,
-0.029899841,
0.064561576,
-0.028730206,
-0.064735174,
-0.024887148,
0.0026119591,
-0.008796896,
0.030246036,
0.009807871,
0.0044631795,
0.0851423,
-0.026132204,
0.11360852,
-0.0045760865,
-0.036643907,
-0.09078616,
0.081466354,
0.012066122,
0.07288108,
0.004079195,
-0.05064171,
0.068772145,
0.029108258,
0.014786602,
-0.11868081,
-0.05042858,
0.05376578,
0.04570744,
0.074074544,
0.028540619,
0.03937392,
0.0291862,
-0.035710927,
-0.09132387,
-0.047720414,
-0.00082342024,
-0.073688805,
0.011024812,
0.015703982,
-0.03590976,
-0.08121826,
0.020365681,
-0.045287356,
-0.024955628,
0.001167751,
0.00037544646,
-0.026392939,
-0.032434102,
0.003407464,
-0.007060387,
0.024250468,
0.076347135,
0.039537415,
0.036043648,
-0.07085338
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 3,
"total_tokens": 3
}
}
},
"is_streaming": false
}
}

View file

@ -1,422 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "Test user parameter",
"encoding_format": "base64",
"user": "test-user-123"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
0.043779343,
0.021533398,
-0.081306435,
0.010584965,
-0.079082854,
-0.03219143,
0.13092613,
0.04234389,
-0.11600539,
-0.07588513,
0.04182356,
-0.08061255,
0.038127176,
-0.010701234,
0.015768763,
-0.04193689,
0.04310592,
-0.033361685,
0.013566423,
-0.010392366,
0.015551022,
-0.037858423,
-0.050305344,
-0.025666261,
-0.047879875,
-0.087179765,
0.016856788,
-0.036765736,
0.006393739,
0.020844297,
0.11262393,
-0.002143682,
-0.07910913,
0.038748607,
0.11532516,
-0.019759571,
0.0066967797,
-0.021164352,
-0.014471563,
-0.0027048697,
-0.034388524,
-0.052571636,
-0.030607725,
0.04747725,
-0.02431059,
0.0109337615,
-0.03946421,
0.071846664,
-0.020690937,
0.01898796,
0.042931512,
-0.0077551426,
0.0025911122,
-0.058268107,
0.0117475465,
-0.022701943,
0.0017815019,
-0.012612941,
0.030724185,
0.017728312,
-0.06155491,
-0.03656162,
0.02583153,
0.02537894,
0.012139213,
0.009105951,
-0.027318193,
-0.093389414,
0.005184693,
0.007488449,
-0.07540277,
0.010159999,
-0.028444426,
0.030260745,
0.0036438918,
-0.022627153,
-0.037846327,
-0.08381657,
-0.012445195,
-0.048908208,
0.029149827,
-0.044437535,
-0.07520237,
-0.020924438,
0.06342514,
0.1629199,
0.060563333,
-0.012817673,
-0.031030292,
0.018368995,
0.11223112,
0.07292473,
-0.062686674,
-0.031803295,
-0.017489262,
0.048433464,
-0.041148387,
-0.04183779,
-0.05994369,
0.15909556,
-0.027785666,
-0.012455991,
0.056005318,
-0.019891974,
0.022063067,
0.006342065,
0.0464118,
-0.07311654,
0.033282198,
0.05949105,
-0.033307947,
0.030738499,
0.008186239,
-0.020268966,
0.056593496,
-0.081526734,
0.023390312,
0.0060836566,
-0.07992586,
0.013986445,
0.052250065,
0.027186505,
-0.049284942,
0.028148174,
0.019493744,
0.05418436,
0.0827222,
-1.8825437e-33,
0.01360945,
-0.010870715,
0.015887791,
0.069373555,
-0.051129147,
0.08999179,
0.044494778,
0.08100757,
0.018944906,
-0.020974122,
-0.017938385,
-0.021756735,
0.010972489,
0.015099965,
0.017018452,
0.094338946,
0.0034407445,
0.010244923,
-0.044709302,
0.0018059182,
0.015817573,
-0.065777056,
-0.004948138,
0.0044092103,
-0.019589791,
-0.092789896,
-0.025898295,
0.044104066,
0.0541385,
-0.007362511,
-0.021487307,
-0.036836285,
-0.09148704,
0.084001675,
-0.018094191,
0.003797567,
0.020257449,
0.04394643,
-0.0772898,
0.0057312953,
-0.054519102,
-0.024835315,
0.0753162,
0.034552757,
-0.081203006,
-0.12210961,
-0.0053012627,
0.00780717,
0.050265096,
0.015569535,
-0.056362487,
0.039800324,
0.013022089,
-0.04015537,
0.014401654,
-0.033209093,
-0.008451782,
-0.037590392,
-0.01965779,
0.01730637,
-0.00896531,
-0.0018413392,
-0.0030382746,
0.030460354,
-0.05112036,
-0.086875,
-0.018338922,
-0.11328767,
0.07325826,
0.046035297,
0.012633494,
-0.06343216,
-0.028439038,
0.020128354,
-0.07883383,
-0.00069870794,
-0.03155447,
0.12306934,
0.004300722,
-0.026421167,
0.078361824,
-0.077461444,
-0.021267027,
0.048929654,
0.02919381,
-0.0092880055,
-0.030666346,
-0.04102384,
-0.03860138,
-0.08042292,
0.023227168,
0.04191858,
-0.058156747,
0.0585743,
0.076342255,
4.465569e-34,
-0.019599343,
0.040230304,
0.01455632,
0.034345042,
0.04392999,
-0.023241352,
0.067749046,
-0.03010354,
-0.09075954,
-0.019227842,
-0.027724287,
-0.00062344945,
0.0042892746,
0.053643614,
0.04075099,
0.032581333,
-0.107116826,
-0.0500636,
-0.016655827,
-0.007782394,
-0.111523,
0.07476429,
-0.016019335,
-0.050536986,
-0.11320647,
-0.0061384854,
0.050886273,
-0.030283457,
0.04318923,
0.03301474,
0.02362771,
0.046507858,
-0.03416386,
0.036145207,
0.023037339,
-0.026803765,
0.06361122,
0.09975251,
0.035269737,
0.1554014,
0.083479255,
0.10931981,
0.046847064,
-0.010136355,
-0.032541983,
0.12926093,
0.031193413,
-0.09971323,
0.010830718,
0.02325219,
-0.011917061,
0.010155018,
0.06883269,
0.009340846,
-0.022698723,
-0.042815465,
-0.048211087,
-0.085067384,
0.05105234,
0.045155898,
-0.03564869,
0.06549556,
0.048875004,
0.037915554,
-0.14071068,
-0.067095764,
0.009898252,
-0.0049653547,
-0.044304688,
0.0039006064,
-0.026903173,
-0.066124685,
0.040738244,
-0.052228633,
0.060485654,
-0.041119356,
-0.04312945,
-0.025152665,
0.08556276,
-0.044942576,
0.06393979,
-0.024227533,
-0.05052092,
-0.0020624825,
-0.078943975,
0.0026753,
0.02068896,
0.102683865,
-0.01237572,
0.056172684,
0.06552171,
0.030940128,
-0.07721113,
-0.061241012,
-0.016143149,
-1.3511957e-08,
-0.050416306,
-0.033628013,
0.046722032,
0.04744138,
-0.04411888,
0.04631675,
-0.0060847937,
-0.053873356,
0.013075445,
0.050437532,
-0.009895477,
-0.0041795173,
0.07229928,
0.021081135,
0.02672776,
-0.07482113,
-0.026757998,
0.052755926,
-0.034690056,
0.039811596,
-0.016370349,
0.045900222,
-0.02250936,
0.023861,
0.04912799,
0.09111738,
-0.0024878879,
0.049395334,
-0.03861115,
0.020867983,
0.076049894,
0.084881924,
-0.051956687,
-0.06878504,
-0.061384037,
0.077220954,
-0.06454818,
0.044513144,
0.008181126,
0.015890416,
-0.04280811,
0.005317184,
0.0034429359,
0.0031937633,
-0.013058055,
-0.09134677,
0.06425565,
-0.054977305,
0.0007087448,
-0.06258866,
-0.034974415,
-0.029966963,
0.044276785,
0.017868131,
-0.027976807,
-0.036579583,
0.021142753,
0.06057356,
-0.03133335,
-0.014331035,
0.034653842,
0.052315667,
-0.036585484,
0.028209662
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 3,
"total_tokens": 3
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,84 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_tool_without_schema[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "Call the no args tool"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "no_args_tool",
"description": "Tool with no arguments",
"parameters": {
"type": "object",
"properties": {}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-452805c3c859",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user wants me to call the no args tool. Let me check the available functions. There's only one tool provided, which is the no_args_tool with no arguments. Since the user didn't specify any parameters, I should just return the tool call as instructed. I need to make sure the JSON is correctly formatted and within the XML tags. Alright, that's all I need.\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-7a67269afe214c85924c5171612bbdbd",
"function": {
"arguments": "{}",
"name": "no_args_tool"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 101,
"prompt_tokens": 136,
"total_tokens": 237,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,421 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "This is completely different content",
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
0.050927628,
0.038399037,
-0.05559374,
-0.105984606,
0.06944504,
-0.08054001,
-0.025946686,
-0.045175657,
0.068730615,
0.016510814,
-0.0011700827,
0.023414683,
-0.0034143464,
0.06804153,
-0.021997927,
-0.014162646,
0.12356902,
-0.06536738,
-0.082627006,
0.04300477,
-0.039514318,
0.055434275,
-0.008866895,
0.020934915,
0.016280092,
0.09630312,
-0.022835929,
0.09175565,
0.06409549,
-0.06226981,
0.010888244,
0.07833004,
0.08844764,
-0.008459277,
-0.07542651,
0.04800223,
0.0042286967,
0.037884884,
0.0023502677,
0.032233667,
0.0047689923,
-0.070404515,
-0.06513966,
0.061046362,
0.021522248,
0.10113185,
-0.07537441,
-0.04074795,
-0.0055522234,
-0.0037093374,
-0.021283673,
-0.018193243,
-0.03323253,
-0.015658593,
0.0032862085,
0.037399907,
-0.021028537,
0.052572608,
0.10211333,
-0.018634265,
0.03612266,
0.08958185,
0.050681055,
0.019839589,
0.10220134,
-0.059074707,
-0.045562137,
-0.024107283,
-0.059917513,
-0.09795064,
-0.002078402,
0.032211803,
0.04863422,
0.08062527,
0.022614514,
0.0005379622,
-0.0015465368,
0.010018953,
-0.089729026,
0.023838207,
-0.015227461,
-0.020540234,
0.08525423,
-0.08025672,
-0.002200058,
0.0649954,
-0.023069935,
-0.06201302,
-0.06545048,
-0.029986514,
0.0045501734,
0.09718718,
0.09153336,
-0.0059684636,
-0.048185453,
-0.011855243,
-0.03170323,
-0.010363732,
0.029717747,
0.103405535,
-0.029072085,
0.005597891,
-0.03075466,
-0.011073092,
-0.038647823,
-0.01590583,
0.0008562756,
0.03479237,
0.0039463183,
-0.020063022,
-0.048164852,
0.026510539,
-0.061183933,
-0.046969693,
0.02144617,
-0.048452575,
0.02205527,
0.015723849,
0.056344535,
0.055321235,
0.037136998,
-0.08872732,
0.011813868,
0.0064246035,
-0.020590257,
-0.059401207,
0.012338125,
-2.4301395e-33,
0.068363585,
-0.05303797,
0.011494271,
0.06953355,
0.013304427,
0.0020351785,
-0.020783585,
0.028951883,
0.034663863,
-0.03274387,
0.00095708756,
0.008672852,
0.007618213,
-0.024579093,
0.030253874,
-0.034167152,
-0.0315152,
0.1105276,
0.03499844,
0.045135163,
0.00044455956,
0.051429555,
0.015050582,
-0.009024664,
0.023132037,
0.05141033,
-0.00417506,
0.004720958,
-0.016197585,
-0.025692327,
-0.024077175,
-0.00953031,
0.05060433,
-0.058328744,
0.04903431,
0.07964924,
0.03599398,
-0.065374464,
-0.035382472,
-0.07028972,
-0.009750123,
-0.031909473,
-0.04101604,
-0.041144423,
-0.036323845,
0.06685511,
0.016679594,
-0.048498012,
-0.015474575,
-0.00048608257,
0.03267068,
-0.010890426,
0.016646467,
-0.057286758,
0.008073807,
0.008808943,
-0.061580453,
-0.010815387,
0.0717443,
0.08607838,
0.014073375,
0.014896061,
-0.098295614,
-0.046653833,
0.033601493,
0.0647405,
-0.007525925,
0.025440095,
0.04171436,
-0.033113986,
-0.014553822,
0.024878975,
0.045614205,
-0.042929318,
-0.040504646,
-0.06304663,
-0.022389242,
0.010583584,
-0.032525852,
-0.03146621,
0.0081922775,
0.021094568,
0.0095269885,
-0.08290188,
-0.021351986,
0.008777032,
0.060185786,
-0.062182017,
0.004518251,
0.05684528,
-0.013033095,
0.01867297,
-0.008998785,
-0.076766245,
0.051622886,
1.6926977e-33,
-0.12588808,
0.011676749,
-0.079886116,
0.02304184,
0.029238446,
0.08721121,
0.06906221,
0.032533444,
0.047794122,
0.13212898,
0.03129717,
-0.0125368,
0.0035920327,
-0.016413208,
-0.038557872,
0.016005918,
0.09166447,
0.047558285,
-0.054981478,
0.06797876,
0.017968502,
0.118666455,
-0.069318265,
0.043814093,
0.04150938,
-0.017812226,
0.051738504,
0.06795029,
0.080493495,
0.005386888,
0.08878265,
-0.036075104,
-0.07708273,
-0.09101018,
-0.09597232,
-0.0937606,
-0.06200779,
0.06722552,
-0.0006647803,
0.029067127,
0.08179574,
-0.06488274,
-0.050375167,
-0.002403243,
-0.026110265,
-0.007630271,
0.011972527,
-0.08573929,
0.04107404,
0.024723932,
-0.02222756,
-0.11560156,
0.006753066,
-0.04589066,
-0.06369223,
0.053635046,
0.005769477,
0.06325056,
0.0048679966,
-0.057087842,
0.041931894,
0.022344982,
-0.14709935,
0.026361033,
0.106274396,
-0.0059068515,
0.020035667,
0.034950804,
-0.03342695,
-0.03884034,
-0.076072656,
-0.11173452,
-0.038953967,
-0.10270519,
0.04714134,
-0.049391687,
0.074747935,
0.041724026,
-0.031083144,
0.0033830043,
0.055804495,
-0.031882074,
-0.02541756,
0.050101582,
0.035991114,
0.09143438,
-0.07581111,
-0.050589707,
0.0074097887,
-0.0014020415,
-0.05036443,
-0.0015289022,
0.005471816,
0.07689256,
0.014164922,
-1.8297508e-08,
0.029913928,
-0.057959806,
-0.06846765,
0.026196472,
-0.0035178436,
0.11374637,
0.056845777,
-0.09315407,
0.0027757618,
0.10895455,
-0.033027817,
0.005051668,
-0.043633904,
-0.048978273,
0.011912417,
0.059747256,
-0.08661686,
-0.052748058,
0.026321623,
0.042173225,
-0.0035451513,
0.03797019,
0.022595786,
-0.0614702,
0.01268269,
0.040893063,
-0.084825225,
0.041167296,
-0.038163006,
0.008364558,
0.01014753,
0.024994388,
-0.012504467,
-0.045078665,
0.0102669485,
-0.046302866,
0.061438397,
0.016235871,
-0.0011558776,
0.007455159,
-0.019448454,
-0.06798961,
0.05472832,
0.09646006,
-0.04711737,
0.060088705,
0.0030213061,
-0.08877283,
0.037262574,
-0.009947699,
0.0035697597,
-0.07833652,
0.02169359,
-0.013075168,
0.072521746,
-0.0649658,
-0.029920656,
-0.017777385,
0.033904497,
0.02919506,
0.08793891,
0.008437021,
0.064442866,
-0.01656208
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,92 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_chat_completion_with_tools[openai_client-txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "What's the weather in Tokyo?"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather information",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name"
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-496035259763",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user is asking about the weather in Tokyo. I need to use the get_weather function for that. The function requires the location parameter, which in this case is Tokyo. I should make sure to specify \"Tokyo\" as the location. Let me check if there are any other parameters needed, but no, the function only needs the location. So the tool call should be straightforward. I'll format the JSON correctly inside the tool_call tags.\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-959b557fa67e4134a2391f5d35e5d5ae",
"function": {
"arguments": "{\"location\": \"Tokyo\"}",
"name": "get_weather"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 117,
"prompt_tokens": 158,
"total_tokens": 275,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,92 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_format_preserves_complex_schemas[openai_client-txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "Process this data"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "process_data",
"description": "Process structured data",
"parameters": {
"type": "object",
"properties": {
"data": {
"$ref": "#/$defs/DataObject"
}
},
"$defs": {
"DataObject": {
"type": "object",
"properties": {
"values": {
"type": "array",
"items": {
"type": "number"
}
}
}
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-524ead18daad",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user wants me to process the data. Let me check the available tools. There's a function called process_data that takes an object with a 'data' parameter. The data is an array of numbers. But the user hasn't provided any specific data yet. They just said \"Process this data.\" Hmm, maybe they expect me to prompt them for the data first. Wait, maybe there's a misunderstanding. Did they include the data in the conversation history? Let me look back. The user's message is \"Process this data.\" No data provided. Oh, maybe they made a mistake and forgot to include it. I need to ask them to provide the data so I can proceed. Let me confirm if there's any data mentioned. No, the current input is just the instruction. So I should ask the user to supply the data array of numbers to process.\n</think>\n\nPlease provide the structured data you'd like me to process. For example, an array of numbers like `[1, 2, 3]`.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 212,
"prompt_tokens": 180,
"total_tokens": 392,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:35:05.205358-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:35:04.346635-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:32:11.182572-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-10-08T11:30:00.392919-07:00",
"size": 2350966784,
"size_vram": 2350966784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,421 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "Test encoding format",
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
0.019109152,
-0.0205217,
-0.071471564,
-0.023057504,
-0.06572786,
-0.0057331678,
-0.029395059,
-0.031822033,
-0.015748156,
-0.039123703,
0.02694331,
-0.0641754,
0.013510709,
0.050364953,
-0.03114308,
-0.08322274,
-0.03192984,
0.074970365,
-0.016377378,
-0.0013804765,
0.03850419,
-0.03441017,
-0.0048610102,
-0.03094053,
0.051915165,
0.009193639,
0.0071807485,
0.066353165,
0.024559105,
-0.04767663,
0.0376255,
-0.042586852,
0.078906916,
0.04827334,
0.13389648,
0.013978803,
0.03242126,
-0.08890431,
-0.014188366,
0.03553346,
-0.02476171,
-0.028628638,
0.047652308,
0.026259335,
0.048472118,
0.06663718,
-0.013584004,
0.071824096,
-0.073066786,
-0.050326068,
0.0039502876,
0.03300394,
-0.047816053,
-0.017657546,
0.010284664,
-0.10525716,
-0.010034394,
0.014627846,
-0.053289402,
0.060343288,
-0.10079798,
0.011359217,
-0.007258805,
0.05346498,
-0.0068726647,
0.03697505,
0.024016414,
0.023924585,
-0.011357761,
-0.119573325,
-0.115692526,
-0.06673285,
-0.04233929,
0.09302018,
0.02486003,
0.084047645,
0.0030104683,
-0.06605523,
0.027435688,
-0.032412402,
-0.025584543,
-0.06590182,
0.067799605,
0.0976311,
0.07360619,
0.034108408,
0.056534845,
0.076705806,
-0.05179011,
0.053681813,
0.0054462817,
0.015972052,
0.0035656213,
0.06333522,
-0.01597322,
0.05295729,
0.11539089,
0.055200845,
0.037667733,
0.08083974,
0.035557732,
-0.07982552,
-0.012100598,
-0.07612801,
-0.0695667,
-0.017815348,
0.16996554,
-0.0048157335,
0.09073964,
-0.07196438,
0.020009195,
-0.05956153,
-0.06312686,
-0.07716358,
0.0150949685,
-0.050339524,
-0.05444592,
-0.023078114,
-0.035431463,
-0.030625492,
-0.053284056,
-0.06745872,
-0.08049862,
0.002800386,
-0.0114065055,
-0.029938627,
0.024243163,
-1.5107368e-33,
-0.02984805,
-0.00033025863,
0.0030491,
0.023082128,
-0.04808977,
-0.0027841914,
-0.037461873,
0.016201235,
-0.02998979,
0.015712254,
0.009664366,
-0.03984875,
-0.029493092,
0.03837007,
-0.005226541,
0.06857773,
-0.007891026,
-0.0019036188,
-0.035219382,
0.03627955,
0.05867878,
0.023777487,
0.044425115,
-0.025999734,
-0.025318418,
-0.02685328,
-0.02368557,
-0.094386704,
0.0016880591,
0.0065193563,
-0.09711005,
-0.053493332,
-0.08241291,
0.023502836,
-0.02407441,
0.015992055,
0.0050546136,
0.030476829,
-0.088438906,
0.11427086,
0.028378993,
0.02985018,
0.022821706,
0.018776013,
0.056330692,
-0.020254886,
-0.00070521404,
-0.0864014,
0.020228866,
-0.0039839754,
0.0010032665,
0.065425254,
-0.036518592,
0.032341316,
0.023112345,
0.044507477,
0.09644409,
-0.07272818,
0.03370691,
0.042783204,
-0.052776046,
0.0003352446,
0.061005518,
-0.019623613,
-0.023274273,
-0.11602989,
0.007926991,
-0.12529127,
0.017030548,
0.013484081,
-0.030528491,
-0.024298145,
0.006284904,
-0.015568167,
-0.072781205,
0.012985074,
0.015977127,
0.0051657534,
-0.0026022948,
-0.059578825,
0.06372584,
-0.0019363016,
0.018695941,
-0.009242735,
-0.05887247,
-0.032524884,
-0.009591115,
-0.047377545,
0.020585002,
-0.007134836,
0.050135154,
0.016087264,
-0.0058878902,
-0.07661024,
0.0820671,
1.6053074e-33,
-0.0056476775,
0.06719423,
-0.011510322,
0.05586423,
-0.08886697,
-0.036528286,
0.12134926,
0.028969096,
0.022419011,
0.047327086,
0.07621525,
-0.07937209,
0.0020504447,
-0.023489932,
-0.029759271,
-0.04879825,
-0.034876924,
0.06461666,
0.051493492,
0.008284975,
-0.031793926,
0.098015875,
0.008122038,
0.01032072,
0.059404474,
0.05176487,
0.042960417,
0.0069373515,
0.027306866,
0.039226852,
0.062416088,
0.051797673,
0.0053232666,
0.05965781,
-0.008935817,
-0.0135501,
0.08726531,
0.028408607,
-0.006820522,
0.052098107,
0.049510423,
0.055176627,
-0.016774576,
0.077848226,
0.026121203,
0.031311177,
0.011812256,
-0.0341528,
0.052825138,
0.003484205,
0.09811821,
0.029693138,
-0.031354938,
-0.012068096,
0.018686052,
-0.032609653,
-0.09638639,
0.033928476,
-0.07897009,
-0.008300913,
-0.04915284,
0.02006342,
0.061743837,
-0.018412542,
-0.033583082,
-0.090903476,
0.021116566,
-0.022445552,
-0.011814237,
-0.048816226,
0.048287436,
-0.07294675,
-0.02198573,
0.062477604,
0.023308119,
-0.052141402,
-0.05409648,
0.062339973,
0.052301563,
0.051384836,
-0.02426406,
-0.018824687,
-0.01660311,
0.09330242,
0.008502433,
0.063408315,
0.019377569,
0.047027417,
-0.0058769877,
-0.0034505578,
0.07956527,
0.10210641,
0.015302805,
0.04089992,
0.038895626,
-1.2710905e-08,
-0.019304764,
-0.1217849,
-0.047983564,
-0.053382736,
-0.113197215,
0.05181196,
-0.10498226,
-0.08524135,
0.0061870585,
-0.029899841,
0.064561576,
-0.028730206,
-0.064735174,
-0.024887148,
0.0026119591,
-0.008796896,
0.030246036,
0.009807871,
0.0044631795,
0.0851423,
-0.026132204,
0.11360852,
-0.0045760865,
-0.036643907,
-0.09078616,
0.081466354,
0.012066122,
0.07288108,
0.004079195,
-0.05064171,
0.068772145,
0.029108258,
0.014786602,
-0.11868081,
-0.05042858,
0.05376578,
0.04570744,
0.074074544,
0.028540619,
0.03937392,
0.0291862,
-0.035710927,
-0.09132387,
-0.047720414,
-0.00082342024,
-0.073688805,
0.011024812,
0.015703982,
-0.03590976,
-0.08121826,
0.020365681,
-0.045287356,
-0.024955628,
0.001167751,
0.00037544646,
-0.026392939,
-0.032434102,
0.003407464,
-0.007060387,
0.024250468,
0.076347135,
0.039537415,
0.036043648,
-0.07085338
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 3,
"total_tokens": 3
}
}
},
"is_streaming": false
}
}

View file

@ -1,54 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "Test dimensions parameter",
"encoding_format": "base64",
"dimensions": 16
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
0.25369987,
0.016355688,
-0.29676768,
0.316427,
-0.18642858,
0.076206245,
-0.031503417,
0.29860005,
-0.496603,
-0.36621967,
0.25334543,
-0.333392,
0.005993569,
0.14079759,
-0.13775977,
-0.14680246
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 3,
"total_tokens": 3
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,98 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tools[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"max_tokens": 4096,
"stream": false,
"tool_choice": "auto",
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-744052775cf9",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user is asking about the weather in San Francisco, CA. I need to use the get_weather function. The function requires the location parameter, which is provided as San Francisco, CA. I should make sure to format the arguments correctly as a JSON object. Let me check the required parameters again. The location is required, so I can't omit it. I'll structure the tool call with the name \"get_weather\" and the arguments including \"location\": \"San Francisco, CA\". That should get the current weather information for the user.\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-b59dc311dd914d3dbd6d455b122bc39c",
"function": {
"arguments": "{\"location\": \"San Francisco, CA\"}",
"name": "get_weather"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 138,
"prompt_tokens": 185,
"total_tokens": 323,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,67 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tool_choice_none[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"max_tokens": 4096,
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-77cf21828360",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user is asking about the weather in San Francisco, CA. I need to check the current weather conditions. But wait, I can't access real-time data. I should mention that I can't provide the current weather forecast and ask them to check a reliable source like the National Weather Service or a weather app. Also, maybe suggest they can provide more details if they need help with something else related to the weather.\n</think>\n\nI'm sorry, but I can't provide real-time weather information. However, you can check the current weather for San Francisco, CA using the National Weather Service (NWS) website, weather apps like Weather.com, or local meteorological services. Let me know if there's anything else I can assist with!",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 154,
"prompt_tokens": 33,
"total_tokens": 187,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:32:11.451164-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-10-08T11:30:00.392919-07:00",
"size": 2350966784,
"size_vram": 2350966784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:32:11.182572-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-10-08T11:30:00.392919-07:00",
"size": 2350966784,
"size_vram": 2350966784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:32:10.878462-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-10-08T11:30:00.392919-07:00",
"size": 2350966784,
"size_vram": 2350966784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,128 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_multiple_tools_with_different_schemas[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "Use one of the available tools"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "simple",
"parameters": {
"type": "object",
"properties": {
"x": {
"type": "string"
}
}
}
}
},
{
"type": "function",
"function": {
"name": "complex",
"parameters": {
"type": "object",
"properties": {
"data": {
"$ref": "#/$defs/Complex"
}
},
"$defs": {
"Complex": {
"type": "object",
"properties": {
"nested": {
"type": "array",
"items": {
"type": "number"
}
}
}
}
}
}
}
},
{
"type": "function",
"function": {
"name": "with_output",
"parameters": {
"type": "object",
"properties": {
"input": {
"type": "string"
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-853f6a700b98",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, let's see. The user wants me to use one of the available tools. The tools provided are simple, complex, and with_output. The simple function takes an argument 'x' of type string. The complex function requires a 'data' parameter that's an object with a nested array of numbers. The with_output function takes an input string.\n\nThe user's query is about using a tool, but there's no specific function name mentioned. Wait, maybe the user expects me to choose one based on the context. Since the tools are available, but the query is a general instruction, perhaps I should ask for clarification. However, the instructions say to use one of the tools if possible. Since the user hasn't specified a particular function, maybe I should check if there's any implicit function needed. But looking at the tools, none are directly related to the query. The user might need to specify which tool to use. Alternatively, maybe the answer is to call the simple function with an example input. But without more context, it's hard to tell. Wait, maybe the user expects me to choose the simplest one. Let's go with the simple function first. So the tool call would be to the simple function with x set to some value. But the user hasn't provided a specific value. Maybe I should state that the tool requires a value. But according to the instructions, if possible, use one of the tools. Since the user hasn't given a value, perhaps the answer is to call the simple function with an example. But the parameters for the simple function require 'x' which is a string. Maybe the user expects me to proceed without needing more info. So I'll proceed by calling the simple function with x as \"example\".\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-12e2ba0189cf484bb936cbb254a5c32a",
"function": {
"arguments": "{\"x\": \"example\"}",
"name": "simple"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 378,
"prompt_tokens": 265,
"total_tokens": 643,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:35:05.320154-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:35:04.346635-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:32:11.451164-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-10-08T11:30:00.392919-07:00",
"size": 2350966784,
"size_vram": 2350966784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:35:05.489695-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:35:04.346635-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:35:06.037921-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:35:04.346635-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,103 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestMCPToolsInChatCompletion::test_mcp_tools_in_inference[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "Calculate 5 + 3"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "calculate",
"description": "",
"parameters": {
"properties": {
"x": {
"title": "X",
"type": "number"
},
"y": {
"title": "Y",
"type": "number"
},
"operation": {
"title": "Operation",
"type": "string"
}
},
"required": [
"x",
"y",
"operation"
],
"title": "calculateArguments",
"type": "object"
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-99bf0054f11a",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user wants to calculate 5 plus 3. Let me check the tools provided. The only function available is 'calculate', which requires x, y, and operation. The parameters are numbers and an operation. The user input is straightforward: 5 + 3. So I need to call the 'calculate' function with x=5, y=3, and operation='+'. That should give the correct result. I don't see any other parameters needed here. Just make sure the JSON is correctly formatted with the required fields.\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-6d1a92899a8246bb8fae5682dc08590c",
"function": {
"arguments": "{\"x\": 5, \"y\": 3, \"operation\": \"+\"}",
"name": "calculate"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 144,
"prompt_tokens": 193,
"total_tokens": 337,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:32:11.182572-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-10-08T11:30:00.392919-07:00",
"size": 2350966784,
"size_vram": 2350966784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:32:10.118228-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-10-08T11:30:00.392919-07:00",
"size": 2350966784,
"size_vram": 2350966784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,421 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "This is the first text",
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.0010839553,
0.067364,
0.015185306,
0.037240896,
0.029337138,
0.015160007,
0.0743005,
-0.0032980628,
0.06581814,
-0.021851996,
0.034412965,
0.051005766,
-0.011422501,
-0.025062356,
-0.051756065,
0.027193472,
0.07849549,
-0.05999108,
0.010471458,
-0.003400683,
0.043449093,
0.122919865,
9.668583e-05,
0.002153268,
0.018064681,
0.045069378,
-0.09762388,
0.11186886,
0.049657565,
-0.03485217,
-0.039568134,
0.003532146,
0.15894793,
0.06341193,
0.047953114,
0.011617699,
0.009799243,
0.015377702,
0.009379663,
0.079989135,
0.019207356,
-0.13718612,
0.01730099,
0.013687199,
0.014266827,
-0.00022628276,
-0.017710257,
0.02291068,
0.03590651,
-0.015361055,
-0.00978436,
-0.0401825,
-0.011481894,
0.00014050963,
0.08540761,
0.08730027,
0.0046967245,
0.006164595,
0.003031956,
0.008891807,
-0.006260525,
0.05061661,
0.0005252785,
0.0467754,
0.09363822,
-0.012814104,
0.017708639,
-0.062698044,
-0.11535818,
0.041123625,
-0.014939021,
0.044815876,
-0.020868087,
0.042999975,
-0.061038766,
0.019998673,
-0.068740115,
-0.035516046,
0.041884515,
0.012185281,
-0.029084096,
-0.06643917,
0.030638866,
0.05149607,
-0.12815061,
0.06821646,
-0.047070153,
-0.032925386,
0.007499353,
-0.017841771,
0.038296465,
-0.015792726,
0.07054022,
0.038072467,
-0.11428876,
0.04210153,
-0.11162366,
-0.045723915,
-0.028951947,
0.12735675,
-0.013946637,
-0.027157523,
0.07295939,
0.024098422,
-0.054050542,
-0.13125896,
0.03013205,
-0.023223283,
-0.019072957,
-0.007864101,
-0.021954412,
-0.05329901,
-0.07088355,
-0.0115214065,
-0.023399564,
-0.015638318,
0.05148062,
0.029261008,
0.06481798,
0.064031154,
0.014445124,
-0.058017716,
-0.069921836,
-0.023950975,
-0.08490842,
-0.08779567,
0.048162255,
-6.1240354e-33,
0.010315817,
0.038685724,
0.0031864564,
0.0357421,
0.0050265454,
-0.004210234,
-0.053900674,
-0.02988569,
-0.07548199,
-0.078777455,
-0.012271205,
-0.05056629,
0.020729113,
-0.051866043,
-0.059254467,
-0.059903424,
-0.055699438,
0.032196835,
-0.006328442,
-0.021668624,
-0.059921067,
0.0519611,
0.051227964,
-0.063502096,
-0.04873505,
-0.014265467,
0.0025537873,
-0.024346355,
-0.0055181426,
0.02007461,
-0.10196586,
0.010727814,
-0.023194604,
-0.081025146,
-0.014997581,
0.0017926424,
0.045078833,
-0.052792255,
-0.05368693,
-0.013245513,
-0.019808132,
0.020031843,
-0.00081401254,
-0.10117647,
-0.0007066768,
0.09663035,
-0.03946875,
0.04954661,
0.042237334,
0.007943922,
-0.05234212,
0.051887065,
0.03711589,
0.034850314,
0.063441575,
-0.026583876,
-0.009227281,
-0.0025737104,
-0.056082893,
0.0020716325,
-0.020129146,
0.0012315192,
-0.0017609745,
0.019111704,
0.016572498,
-0.011374,
0.010381644,
-0.007864189,
0.04664868,
-0.046856377,
-0.08523834,
-0.008974813,
0.012022968,
0.013285977,
0.015182303,
0.03708482,
0.026587088,
0.014473839,
-0.013946565,
0.01999883,
-0.06888259,
-0.07111367,
0.012369427,
0.032828625,
-0.03152666,
0.045777358,
0.06801705,
-0.07747748,
0.018461134,
0.06620267,
-0.086365156,
0.008950603,
0.041320425,
0.009541193,
0.0066037327,
4.71081e-33,
-0.026172558,
0.0013145636,
-0.014140948,
-0.024360213,
0.06931815,
0.031448748,
0.037257418,
0.06468137,
0.049403396,
0.11072201,
0.04985356,
0.06679111,
0.04153249,
-0.034106053,
0.070283465,
0.034855895,
0.12902643,
-0.021033453,
0.008940618,
0.030177405,
-0.022881329,
0.036504544,
-0.13194299,
0.045612644,
-0.0127895875,
0.04174139,
0.1232064,
-0.013484046,
-0.007285246,
-0.029776007,
0.025007037,
-0.009516822,
0.02475585,
0.023208592,
-0.019141924,
0.02259424,
0.013740329,
-0.038490705,
-0.014461541,
0.075218394,
0.13589163,
0.009839605,
-0.037563317,
-0.02737327,
-0.016485116,
-0.048845276,
-0.03523722,
-0.05439929,
-0.0017957076,
0.03563579,
-0.010255764,
-0.01859244,
-0.03647324,
-0.055985246,
-0.007833892,
0.009086756,
-0.007333394,
0.050386623,
-0.0002305643,
-0.03637248,
-0.024937423,
0.058877032,
-0.07250415,
0.07401245,
0.053917013,
-0.051895224,
-0.006332244,
0.07850189,
-0.01695057,
-0.006673017,
0.012659739,
-0.014127065,
-0.13639799,
-0.08524976,
-0.017533274,
-0.0046930755,
0.013687301,
0.0009185522,
-0.0719948,
-0.06887779,
0.14208324,
0.03187123,
-0.055919908,
0.030401653,
0.061900012,
0.029921472,
-0.00096237566,
-0.065010294,
-0.020657646,
0.039562404,
-0.123846576,
0.0028867351,
0.051196404,
0.13397509,
-0.088453874,
-1.7590333e-08,
-0.025786474,
-0.080303885,
-0.09164947,
0.031999,
0.00584884,
0.11464121,
0.023377793,
-0.06902527,
-0.055941124,
-0.05787791,
0.014640494,
0.080320895,
0.0037027278,
-0.030824674,
0.024432683,
0.008549355,
-0.05291309,
-0.06636625,
0.0007468212,
-0.02379191,
0.030766092,
0.054053318,
-0.0027251292,
-0.09928475,
-0.0150488615,
0.016240431,
-0.0015727071,
0.01190173,
0.007895162,
0.04894733,
0.00487708,
0.08263861,
-0.014527478,
-0.043879665,
0.004633697,
0.024611989,
0.023827499,
0.02366802,
0.050754935,
-0.051841788,
0.0212632,
-0.0034418616,
-0.021175656,
0.020591663,
-0.06475325,
0.0542002,
0.027792262,
-0.05295982,
0.01509645,
-0.11977527,
-0.03416359,
-0.012206606,
0.047451705,
0.020876253,
-0.026368074,
0.01502373,
0.033982284,
0.059788153,
-0.052526973,
0.03356499,
0.061180886,
0.096336305,
0.116353564,
-0.016122948
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5
}
}
},
"is_streaming": false
}
}

View file

@ -1,57 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:35:04.346635-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:35:05.395473-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:35:04.346635-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,114 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_tool_with_complex_schema[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "Book a flight from SFO to JFK for John Doe"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "book_flight",
"description": "Book a flight",
"parameters": {
"type": "object",
"properties": {
"flight": {
"$ref": "#/$defs/FlightInfo"
},
"passenger": {
"$ref": "#/$defs/Passenger"
}
},
"required": [
"flight",
"passenger"
],
"$defs": {
"FlightInfo": {
"type": "object",
"properties": {
"from": {
"type": "string"
},
"to": {
"type": "string"
},
"date": {
"type": "string",
"format": "date"
}
}
},
"Passenger": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "integer"
}
}
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-cfb292c0f41d",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user wants to book a flight from SFO to JFK for John Doe. Let me check the tools available. The provided function is book_flight, which requires flight information and a passenger. The parameters needed are flight (as a FlightInfo object) and passenger (with name and age). The user mentioned SFO to JFK, so the flight details are from and to. The passenger's name is John Doe, but the age isn't provided. Wait, the function parameters require the passenger's name and age, but the user only mentioned the name. Maybe the age is missing? But the user didn't specify it, so perhaps I should note that the age is required. However, the function's required parameters are flight and passenger, so even if age is missing, the function can't be called without it. So I need to include both flight info and passenger details. The user's message only gives the name and destination, not the flight details or age. Therefore, I need to ask for the flight details and the passenger's age. But the user hasn't provided those. So I can't proceed with the function call. Wait, but maybe the user expects me to assume some default? No, the function requires all parameters. Since the user hasn't provided flight details or age, I can't call the function. So the correct response is to prompt the user for those details.\n</think>\n\nThe booking requires the flight details and passenger's age. Could you provide the flight number and John Doe's age?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 310,
"prompt_tokens": 261,
"total_tokens": 571,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,421 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "Hello, world!",
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.038168654,
0.032873917,
-0.0055947267,
0.014366432,
-0.040310103,
-0.116643615,
0.031721067,
0.0019260457,
-0.04255802,
0.029198613,
0.04252229,
0.032184314,
0.029838374,
0.010959321,
-0.053805783,
-0.05028783,
-0.023449864,
0.0107550435,
-0.13774979,
0.0039929547,
0.029302042,
0.066712305,
-0.015410682,
0.048422653,
-0.08814465,
-0.012715775,
0.041334823,
0.040851083,
-0.050064698,
-0.05804616,
0.048728727,
0.06888658,
0.058795262,
0.008804153,
-0.016073612,
0.08514259,
-0.078146815,
-0.07741974,
0.020842256,
0.016201088,
0.032518543,
-0.05346469,
-0.062197812,
-0.024271712,
0.007416788,
0.024103774,
0.006469804,
0.051166162,
0.07284196,
0.034627657,
-0.05475476,
-0.059386417,
-0.0071934434,
0.020163197,
0.035816014,
0.0055927313,
0.010762318,
-0.05274177,
0.010083032,
-0.008742163,
-0.06284565,
0.038426206,
-0.013933317,
0.07342759,
0.09004579,
-0.07995627,
-0.016420787,
0.044767782,
-0.06886435,
-0.03303916,
-0.015482072,
0.011322529,
0.036461752,
0.066346884,
-0.05434455,
0.008740993,
0.012066104,
-0.038101126,
0.0069316486,
0.051146947,
0.07740579,
-0.122950904,
0.016380342,
0.049568996,
0.031634904,
-0.039637603,
0.0016715266,
0.009577405,
-0.032646418,
-0.033988595,
-0.13329837,
0.0072566303,
-0.010266605,
0.038557075,
-0.09338859,
-0.041706774,
0.069941126,
-0.026323376,
-0.14971305,
0.13445398,
0.03748492,
0.052825302,
0.0450506,
0.018712776,
0.05444322,
0.017282845,
-0.032480195,
0.04614526,
-0.046711974,
-0.030566413,
-0.01820007,
-0.04869831,
0.033051647,
-0.0038142777,
0.04999665,
-0.058270358,
-0.010011706,
0.010643473,
-0.040113144,
-0.0015507729,
0.060854245,
-0.045562096,
0.049257778,
0.02612153,
0.01981428,
-0.001660993,
0.059509434,
-6.525298e-33,
0.063519135,
0.0030875143,
0.028961418,
0.1733713,
0.0029763067,
0.027727291,
-0.0951315,
-0.031186627,
0.026689058,
-0.010807322,
0.023850724,
0.023777472,
-0.031174092,
0.049501278,
-0.025049716,
0.10175924,
-0.07919064,
-0.0032249284,
0.042915843,
0.09483459,
-0.06652636,
0.006303593,
0.02220902,
0.06999181,
-0.0074810013,
-0.0017734945,
0.027008688,
-0.07534615,
0.114036545,
0.008552313,
-0.023737878,
-0.04694563,
0.014472103,
0.019855395,
-0.0046694353,
0.0013555645,
-0.034298304,
-0.054142635,
-0.09419824,
-0.028909719,
-0.018876282,
0.0457315,
0.04761082,
-0.0030971593,
-0.033264168,
-0.013539523,
0.051041685,
0.031110944,
0.015244497,
0.054158635,
-0.08499706,
0.013360703,
-0.04759633,
0.07101136,
-0.0131114535,
-0.0023818254,
0.050331973,
-0.041642286,
-0.01419894,
0.032463223,
0.0053973934,
0.091275506,
0.0044798073,
-0.018260129,
-0.015278888,
-0.046306957,
0.038750377,
0.014729783,
0.05204642,
0.0017938613,
-0.014963651,
0.027101943,
0.031203475,
0.023725478,
-0.004601222,
0.03617344,
0.06679477,
-0.0018401983,
0.021265576,
-0.057589985,
0.019155758,
0.031437635,
-0.018444614,
-0.04085069,
0.10393101,
0.011960795,
-0.014898805,
-0.10520497,
-0.012302656,
-0.00043837292,
-0.09508398,
0.058318105,
0.042576887,
-0.025066672,
-0.094555676,
4.0072287e-33,
0.1322281,
0.0053512393,
-0.03312536,
-0.09096454,
-0.031562407,
-0.033949774,
-0.07205118,
0.1259232,
-0.08333555,
0.052797858,
0.001077506,
0.022004265,
0.10402767,
0.013034249,
0.04091762,
0.018705815,
0.11424037,
0.024799824,
0.014582492,
0.006205516,
-0.011202356,
-0.035756435,
-0.03800272,
0.011251353,
-0.0512988,
0.007890417,
0.06736164,
0.0033359542,
-0.09285096,
0.03704081,
-0.022326592,
0.039967872,
-0.030748183,
-0.011446819,
-0.014453254,
0.02498229,
-0.097532175,
-0.035378877,
-0.03757795,
-0.010181498,
-0.06392041,
0.025538994,
0.02061816,
0.03757256,
-0.1043548,
-0.028326731,
-0.05209465,
0.0128473425,
-0.051238894,
-0.029034877,
-0.09633617,
-0.042309195,
0.067165054,
-0.030870603,
-0.010357507,
0.027381465,
-0.028105576,
0.010302046,
0.04306986,
0.022315372,
0.007954779,
0.056068663,
0.04071972,
0.09293905,
0.016536433,
-0.053764775,
0.00047211433,
0.050708972,
0.042510226,
-0.029195962,
0.009274875,
-0.010647389,
-0.037209682,
0.002267011,
-0.030304702,
0.0745741,
0.0026207205,
-0.017582772,
0.0028797672,
0.038404796,
0.00723137,
0.045613218,
0.03998252,
0.014209623,
-0.0142997475,
0.05850862,
0.03630791,
0.055294298,
-0.020075988,
-0.08041808,
-0.030250112,
-0.014920701,
0.022349516,
0.011911506,
-0.06903851,
-1.8806734e-08,
-0.078480355,
0.046674173,
-0.023920896,
0.0634942,
0.02396477,
0.0014517035,
-0.090798445,
-0.06684978,
-0.0801405,
0.005503192,
0.053675175,
0.104841895,
-0.066848256,
0.015522683,
0.067097165,
0.070832625,
-0.03197915,
0.020843629,
-0.0219202,
-0.0073016756,
-0.010645817,
0.0040983153,
0.03313765,
-0.0790081,
0.03878132,
-0.075230986,
-0.015732396,
0.0060099233,
0.0051297406,
-0.061492138,
0.04202211,
0.09544608,
-0.04318599,
0.014424486,
-0.10617826,
-0.027963417,
0.011034413,
0.069576606,
0.06689785,
-0.07479674,
-0.07851099,
0.042766396,
-0.034639932,
-0.10607304,
-0.03577663,
0.051540814,
0.068673156,
-0.049959548,
0.015460458,
-0.064520314,
-0.076010585,
0.026035817,
0.07440218,
-0.012396022,
0.13329679,
0.074770845,
0.05134284,
0.020977058,
-0.026776016,
0.08894323,
0.039937407,
-0.04102053,
0.03194075,
0.018113315
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 4,
"total_tokens": 4
}
}
},
"is_streaming": false
}
}

View file

@ -1,48 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"prompt": "I am feeling really sad today.",
"stream": false
},
"endpoint": "/v1/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "rec-d2ba309413e8",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": " I have been working on a project that I feel like I'm not doing well",
"stop_reason": null,
"prompt_logprobs": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 16,
"prompt_tokens": 7,
"total_tokens": 23,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"service_tier": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,421 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "Hello, world!",
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.038168654,
0.032873917,
-0.0055947267,
0.014366432,
-0.040310103,
-0.116643615,
0.031721067,
0.0019260457,
-0.04255802,
0.029198613,
0.04252229,
0.032184314,
0.029838374,
0.010959321,
-0.053805783,
-0.05028783,
-0.023449864,
0.0107550435,
-0.13774979,
0.0039929547,
0.029302042,
0.066712305,
-0.015410682,
0.048422653,
-0.08814465,
-0.012715775,
0.041334823,
0.040851083,
-0.050064698,
-0.05804616,
0.048728727,
0.06888658,
0.058795262,
0.008804153,
-0.016073612,
0.08514259,
-0.078146815,
-0.07741974,
0.020842256,
0.016201088,
0.032518543,
-0.05346469,
-0.062197812,
-0.024271712,
0.007416788,
0.024103774,
0.006469804,
0.051166162,
0.07284196,
0.034627657,
-0.05475476,
-0.059386417,
-0.0071934434,
0.020163197,
0.035816014,
0.0055927313,
0.010762318,
-0.05274177,
0.010083032,
-0.008742163,
-0.06284565,
0.038426206,
-0.013933317,
0.07342759,
0.09004579,
-0.07995627,
-0.016420787,
0.044767782,
-0.06886435,
-0.03303916,
-0.015482072,
0.011322529,
0.036461752,
0.066346884,
-0.05434455,
0.008740993,
0.012066104,
-0.038101126,
0.0069316486,
0.051146947,
0.07740579,
-0.122950904,
0.016380342,
0.049568996,
0.031634904,
-0.039637603,
0.0016715266,
0.009577405,
-0.032646418,
-0.033988595,
-0.13329837,
0.0072566303,
-0.010266605,
0.038557075,
-0.09338859,
-0.041706774,
0.069941126,
-0.026323376,
-0.14971305,
0.13445398,
0.03748492,
0.052825302,
0.0450506,
0.018712776,
0.05444322,
0.017282845,
-0.032480195,
0.04614526,
-0.046711974,
-0.030566413,
-0.01820007,
-0.04869831,
0.033051647,
-0.0038142777,
0.04999665,
-0.058270358,
-0.010011706,
0.010643473,
-0.040113144,
-0.0015507729,
0.060854245,
-0.045562096,
0.049257778,
0.02612153,
0.01981428,
-0.001660993,
0.059509434,
-6.525298e-33,
0.063519135,
0.0030875143,
0.028961418,
0.1733713,
0.0029763067,
0.027727291,
-0.0951315,
-0.031186627,
0.026689058,
-0.010807322,
0.023850724,
0.023777472,
-0.031174092,
0.049501278,
-0.025049716,
0.10175924,
-0.07919064,
-0.0032249284,
0.042915843,
0.09483459,
-0.06652636,
0.006303593,
0.02220902,
0.06999181,
-0.0074810013,
-0.0017734945,
0.027008688,
-0.07534615,
0.114036545,
0.008552313,
-0.023737878,
-0.04694563,
0.014472103,
0.019855395,
-0.0046694353,
0.0013555645,
-0.034298304,
-0.054142635,
-0.09419824,
-0.028909719,
-0.018876282,
0.0457315,
0.04761082,
-0.0030971593,
-0.033264168,
-0.013539523,
0.051041685,
0.031110944,
0.015244497,
0.054158635,
-0.08499706,
0.013360703,
-0.04759633,
0.07101136,
-0.0131114535,
-0.0023818254,
0.050331973,
-0.041642286,
-0.01419894,
0.032463223,
0.0053973934,
0.091275506,
0.0044798073,
-0.018260129,
-0.015278888,
-0.046306957,
0.038750377,
0.014729783,
0.05204642,
0.0017938613,
-0.014963651,
0.027101943,
0.031203475,
0.023725478,
-0.004601222,
0.03617344,
0.06679477,
-0.0018401983,
0.021265576,
-0.057589985,
0.019155758,
0.031437635,
-0.018444614,
-0.04085069,
0.10393101,
0.011960795,
-0.014898805,
-0.10520497,
-0.012302656,
-0.00043837292,
-0.09508398,
0.058318105,
0.042576887,
-0.025066672,
-0.094555676,
4.0072287e-33,
0.1322281,
0.0053512393,
-0.03312536,
-0.09096454,
-0.031562407,
-0.033949774,
-0.07205118,
0.1259232,
-0.08333555,
0.052797858,
0.001077506,
0.022004265,
0.10402767,
0.013034249,
0.04091762,
0.018705815,
0.11424037,
0.024799824,
0.014582492,
0.006205516,
-0.011202356,
-0.035756435,
-0.03800272,
0.011251353,
-0.0512988,
0.007890417,
0.06736164,
0.0033359542,
-0.09285096,
0.03704081,
-0.022326592,
0.039967872,
-0.030748183,
-0.011446819,
-0.014453254,
0.02498229,
-0.097532175,
-0.035378877,
-0.03757795,
-0.010181498,
-0.06392041,
0.025538994,
0.02061816,
0.03757256,
-0.1043548,
-0.028326731,
-0.05209465,
0.0128473425,
-0.051238894,
-0.029034877,
-0.09633617,
-0.042309195,
0.067165054,
-0.030870603,
-0.010357507,
0.027381465,
-0.028105576,
0.010302046,
0.04306986,
0.022315372,
0.007954779,
0.056068663,
0.04071972,
0.09293905,
0.016536433,
-0.053764775,
0.00047211433,
0.050708972,
0.042510226,
-0.029195962,
0.009274875,
-0.010647389,
-0.037209682,
0.002267011,
-0.030304702,
0.0745741,
0.0026207205,
-0.017582772,
0.0028797672,
0.038404796,
0.00723137,
0.045613218,
0.03998252,
0.014209623,
-0.0142997475,
0.05850862,
0.03630791,
0.055294298,
-0.020075988,
-0.08041808,
-0.030250112,
-0.014920701,
0.022349516,
0.011911506,
-0.06903851,
-1.8806734e-08,
-0.078480355,
0.046674173,
-0.023920896,
0.0634942,
0.02396477,
0.0014517035,
-0.090798445,
-0.06684978,
-0.0801405,
0.005503192,
0.053675175,
0.104841895,
-0.066848256,
0.015522683,
0.067097165,
0.070832625,
-0.03197915,
0.020843629,
-0.0219202,
-0.0073016756,
-0.010645817,
0.0040983153,
0.03313765,
-0.0790081,
0.03878132,
-0.075230986,
-0.015732396,
0.0060099233,
0.0051297406,
-0.061492138,
0.04202211,
0.09544608,
-0.04318599,
0.014424486,
-0.10617826,
-0.027963417,
0.011034413,
0.069576606,
0.06689785,
-0.07479674,
-0.07851099,
0.042766396,
-0.034639932,
-0.10607304,
-0.03577663,
0.051540814,
0.068673156,
-0.049959548,
0.015460458,
-0.064520314,
-0.076010585,
0.026035817,
0.07440218,
-0.012396022,
0.13329679,
0.074770845,
0.05134284,
0.020977058,
-0.026776016,
0.08894323,
0.039937407,
-0.04102053,
0.03194075,
0.018113315
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 4,
"total_tokens": 4
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,96 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_structured_output[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:structured_output]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant. Michael Jordan was born in 1963. His first name is \"Michael\", He played basketball for the Chicago Bulls for 15 seasons and was drafted in 1984"
},
{
"role": "user",
"content": "Please give me information about Michael Jordan."
}
],
"max_tokens": 4096,
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "AnswerFormat",
"schema": {
"properties": {
"first_name": {
"title": "First Name",
"type": "string"
},
"last_name": {
"title": "Last Name",
"type": "string"
},
"year_of_birth": {
"title": "Year Of Birth",
"type": "integer"
}
},
"required": [
"first_name",
"last_name",
"year_of_birth"
],
"title": "AnswerFormat",
"type": "object"
}
}
},
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-df353403c7fb",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 28,
"prompt_tokens": 66,
"total_tokens": 94,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,54 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"prompt": "I am feeling really sad today.",
"stream": false,
"extra_body": {
"guided_choices": [
"joy",
"sadness"
]
}
},
"endpoint": "/v1/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "rec-e3727f6c749a",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": " I feel that I am not good enough, and I feel like I have no",
"stop_reason": null,
"prompt_logprobs": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 16,
"prompt_tokens": 7,
"total_tokens": 23,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"service_tier": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,421 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "This is completely different content",
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
0.050927628,
0.038399037,
-0.05559374,
-0.105984606,
0.06944504,
-0.08054001,
-0.025946686,
-0.045175657,
0.068730615,
0.016510814,
-0.0011700827,
0.023414683,
-0.0034143464,
0.06804153,
-0.021997927,
-0.014162646,
0.12356902,
-0.06536738,
-0.082627006,
0.04300477,
-0.039514318,
0.055434275,
-0.008866895,
0.020934915,
0.016280092,
0.09630312,
-0.022835929,
0.09175565,
0.06409549,
-0.06226981,
0.010888244,
0.07833004,
0.08844764,
-0.008459277,
-0.07542651,
0.04800223,
0.0042286967,
0.037884884,
0.0023502677,
0.032233667,
0.0047689923,
-0.070404515,
-0.06513966,
0.061046362,
0.021522248,
0.10113185,
-0.07537441,
-0.04074795,
-0.0055522234,
-0.0037093374,
-0.021283673,
-0.018193243,
-0.03323253,
-0.015658593,
0.0032862085,
0.037399907,
-0.021028537,
0.052572608,
0.10211333,
-0.018634265,
0.03612266,
0.08958185,
0.050681055,
0.019839589,
0.10220134,
-0.059074707,
-0.045562137,
-0.024107283,
-0.059917513,
-0.09795064,
-0.002078402,
0.032211803,
0.04863422,
0.08062527,
0.022614514,
0.0005379622,
-0.0015465368,
0.010018953,
-0.089729026,
0.023838207,
-0.015227461,
-0.020540234,
0.08525423,
-0.08025672,
-0.002200058,
0.0649954,
-0.023069935,
-0.06201302,
-0.06545048,
-0.029986514,
0.0045501734,
0.09718718,
0.09153336,
-0.0059684636,
-0.048185453,
-0.011855243,
-0.03170323,
-0.010363732,
0.029717747,
0.103405535,
-0.029072085,
0.005597891,
-0.03075466,
-0.011073092,
-0.038647823,
-0.01590583,
0.0008562756,
0.03479237,
0.0039463183,
-0.020063022,
-0.048164852,
0.026510539,
-0.061183933,
-0.046969693,
0.02144617,
-0.048452575,
0.02205527,
0.015723849,
0.056344535,
0.055321235,
0.037136998,
-0.08872732,
0.011813868,
0.0064246035,
-0.020590257,
-0.059401207,
0.012338125,
-2.4301395e-33,
0.068363585,
-0.05303797,
0.011494271,
0.06953355,
0.013304427,
0.0020351785,
-0.020783585,
0.028951883,
0.034663863,
-0.03274387,
0.00095708756,
0.008672852,
0.007618213,
-0.024579093,
0.030253874,
-0.034167152,
-0.0315152,
0.1105276,
0.03499844,
0.045135163,
0.00044455956,
0.051429555,
0.015050582,
-0.009024664,
0.023132037,
0.05141033,
-0.00417506,
0.004720958,
-0.016197585,
-0.025692327,
-0.024077175,
-0.00953031,
0.05060433,
-0.058328744,
0.04903431,
0.07964924,
0.03599398,
-0.065374464,
-0.035382472,
-0.07028972,
-0.009750123,
-0.031909473,
-0.04101604,
-0.041144423,
-0.036323845,
0.06685511,
0.016679594,
-0.048498012,
-0.015474575,
-0.00048608257,
0.03267068,
-0.010890426,
0.016646467,
-0.057286758,
0.008073807,
0.008808943,
-0.061580453,
-0.010815387,
0.0717443,
0.08607838,
0.014073375,
0.014896061,
-0.098295614,
-0.046653833,
0.033601493,
0.0647405,
-0.007525925,
0.025440095,
0.04171436,
-0.033113986,
-0.014553822,
0.024878975,
0.045614205,
-0.042929318,
-0.040504646,
-0.06304663,
-0.022389242,
0.010583584,
-0.032525852,
-0.03146621,
0.0081922775,
0.021094568,
0.0095269885,
-0.08290188,
-0.021351986,
0.008777032,
0.060185786,
-0.062182017,
0.004518251,
0.05684528,
-0.013033095,
0.01867297,
-0.008998785,
-0.076766245,
0.051622886,
1.6926977e-33,
-0.12588808,
0.011676749,
-0.079886116,
0.02304184,
0.029238446,
0.08721121,
0.06906221,
0.032533444,
0.047794122,
0.13212898,
0.03129717,
-0.0125368,
0.0035920327,
-0.016413208,
-0.038557872,
0.016005918,
0.09166447,
0.047558285,
-0.054981478,
0.06797876,
0.017968502,
0.118666455,
-0.069318265,
0.043814093,
0.04150938,
-0.017812226,
0.051738504,
0.06795029,
0.080493495,
0.005386888,
0.08878265,
-0.036075104,
-0.07708273,
-0.09101018,
-0.09597232,
-0.0937606,
-0.06200779,
0.06722552,
-0.0006647803,
0.029067127,
0.08179574,
-0.06488274,
-0.050375167,
-0.002403243,
-0.026110265,
-0.007630271,
0.011972527,
-0.08573929,
0.04107404,
0.024723932,
-0.02222756,
-0.11560156,
0.006753066,
-0.04589066,
-0.06369223,
0.053635046,
0.005769477,
0.06325056,
0.0048679966,
-0.057087842,
0.041931894,
0.022344982,
-0.14709935,
0.026361033,
0.106274396,
-0.0059068515,
0.020035667,
0.034950804,
-0.03342695,
-0.03884034,
-0.076072656,
-0.11173452,
-0.038953967,
-0.10270519,
0.04714134,
-0.049391687,
0.074747935,
0.041724026,
-0.031083144,
0.0033830043,
0.055804495,
-0.031882074,
-0.02541756,
0.050101582,
0.035991114,
0.09143438,
-0.07581111,
-0.050589707,
0.0074097887,
-0.0014020415,
-0.05036443,
-0.0015289022,
0.005471816,
0.07689256,
0.014164922,
-1.8297508e-08,
0.029913928,
-0.057959806,
-0.06846765,
0.026196472,
-0.0035178436,
0.11374637,
0.056845777,
-0.09315407,
0.0027757618,
0.10895455,
-0.033027817,
0.005051668,
-0.043633904,
-0.048978273,
0.011912417,
0.059747256,
-0.08661686,
-0.052748058,
0.026321623,
0.042173225,
-0.0035451513,
0.03797019,
0.022595786,
-0.0614702,
0.01268269,
0.040893063,
-0.084825225,
0.041167296,
-0.038163006,
0.008364558,
0.01014753,
0.024994388,
-0.012504467,
-0.045078665,
0.0102669485,
-0.046302866,
0.061438397,
0.016235871,
-0.0011558776,
0.007455159,
-0.019448454,
-0.06798961,
0.05472832,
0.09646006,
-0.04711737,
0.060088705,
0.0030213061,
-0.08877283,
0.037262574,
-0.009947699,
0.0035697597,
-0.07833652,
0.02169359,
-0.013075168,
0.072521746,
-0.0649658,
-0.029920656,
-0.017777385,
0.033904497,
0.02919506,
0.08793891,
0.008437021,
0.064442866,
-0.01656208
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,92 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_simple_tool_call[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "What's the weather in San Francisco?"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name"
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-e89112e7735f",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user is asking for the weather in San Francisco. I need to check if there's a function available for that. Looking at the tools provided, there's a function called get_weather that requires a location parameter. The description says it gets weather for a location, and the parameter is the city name. The user provided \"San Francisco\" as the location, so I should call the get_weather function with \"San Francisco\" as the argument. I don't see any other parameters needed here, so the tool call should be straightforward. Just make sure the city name is correctly formatted in JSON.\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-feead29842dc40b2831c41ed397f555f",
"function": {
"arguments": "{\"location\": \"San Francisco\"}",
"name": "get_weather"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 146,
"prompt_tokens": 161,
"total_tokens": 307,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:35:05.079436-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:35:04.346635-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -28,7 +28,7 @@
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "sadness",
"text": "joy",
"stop_reason": null,
"prompt_logprobs": null
}
@ -38,13 +38,12 @@
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 3,
"completion_tokens": 2,
"prompt_tokens": 7,
"total_tokens": 10,
"total_tokens": 9,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"service_tier": null,
"kv_transfer_params": null
}
},

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:32:10.779723-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-10-08T11:30:00.392919-07:00",
"size": 2350966784,
"size_vram": 2350966784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[openai_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:35:05.489695-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-10-08T11:35:04.346635-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -1,75 +0,0 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://localhost:11434/api/ps",
"headers": {},
"body": {},
"endpoint": "/api/ps",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2:3b",
"name": "llama3.2:3b",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"expires_at": "2025-10-08T16:14:05.423042-07:00",
"size": 3367856128,
"size_vram": 3367856128,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-10-08T11:32:11.101611-07:00",
"size": 585846784,
"size_vram": 585846784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-10-08T11:30:00.392919-07:00",
"size": 2350966784,
"size_vram": 2350966784,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375180,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-aeeb49e5e51c42fa94562780165bd620",
"object": "model_permission",
"created": 1762375180,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375115,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-feec0a894be04f738e12b596ff163b64",
"object": "model_permission",
"created": 1762375115,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_stop_sequence[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:stop_sequence]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374330,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-119e17052e4c4c13bd791af3138d5360",
"object": "model_permission",
"created": 1762374330,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375226,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-c6ae673fda084519b3c67947896cd3b0",
"object": "model_permission",
"created": 1762375226,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374573,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-3f422354a81e491b87f93d5b192a0e1a",
"object": "model_permission",
"created": 1762374573,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374305,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-794e16e59ddb4216a8bedfdf485b8f24",
"object": "model_permission",
"created": 1762374305,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=vllm/Qwen/Qwen3-0.6B-True]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374317,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-ff7d26d076eb4373a0631a80fe3ae063",
"object": "model_permission",
"created": 1762374317,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375033,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-2a16fede981b43be9e1cbe3dbedd1e74",
"object": "model_permission",
"created": 1762375033,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374297,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-4bc93704559a4e1d8492aeec7222040c",
"object": "model_permission",
"created": 1762374297,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

Some files were not shown because too many files have changed in this diff Show more