Merge remote-tracking branch 'upstream/main' into strip-telem

This commit is contained in:
Charlie Doern 2025-11-06 09:47:12 -05:00
commit d00a085aed
91 changed files with 9321 additions and 544 deletions

View file

@ -72,7 +72,8 @@ runs:
echo "New recordings detected, committing and pushing" echo "New recordings detected, committing and pushing"
git add tests/integration/ git add tests/integration/
git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})" git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
git fetch origin ${{ github.ref_name }} git fetch origin ${{ github.ref_name }}
git rebase origin/${{ github.ref_name }} git rebase origin/${{ github.ref_name }}
echo "Rebased successfully" echo "Rebased successfully"
@ -88,6 +89,8 @@ runs:
run: | run: |
# Ollama logs (if ollama container exists) # Ollama logs (if ollama container exists)
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
# vllm logs (if vllm container exists)
sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
# Note: distro container logs are now dumped in integration-tests.sh before container is removed # Note: distro container logs are now dumped in integration-tests.sh before container is removed
- name: Upload logs - name: Upload logs

View file

@ -11,13 +11,14 @@ runs:
--name vllm \ --name vllm \
-p 8000:8000 \ -p 8000:8000 \
--privileged=true \ --privileged=true \
quay.io/higginsd/vllm-cpu:65393ee064 \ quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
--host 0.0.0.0 \ --host 0.0.0.0 \
--port 8000 \ --port 8000 \
--enable-auto-tool-choice \ --enable-auto-tool-choice \
--tool-call-parser llama3_json \ --tool-call-parser hermes \
--model /root/.cache/Llama-3.2-1B-Instruct \ --model /root/.cache/Qwen3-0.6B \
--served-model-name meta-llama/Llama-3.2-1B-Instruct --served-model-name Qwen/Qwen3-0.6B \
--max-model-len 8192
# Wait for vllm to be ready # Wait for vllm to be ready
echo "Waiting for vllm to be ready..." echo "Waiting for vllm to be ready..."

View file

@ -27,7 +27,6 @@ on:
schedule: schedule:
# If changing the cron schedule, update the provider in the test-matrix job # If changing the cron schedule, update the provider in the test-matrix job
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC - cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
- cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC
workflow_dispatch: workflow_dispatch:
inputs: inputs:
test-all-client-versions: test-all-client-versions:

View file

@ -9976,6 +9976,70 @@ components:
- metadata - metadata
title: VectorStoreObject title: VectorStoreObject
description: OpenAI Vector Store object. description: OpenAI Vector Store object.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreRequestWithExtraBody": "OpenAICreateVectorStoreRequestWithExtraBody":
type: object type: object
properties: properties:
@ -10001,15 +10065,7 @@ components:
description: >- description: >-
(Optional) Expiration policy for the vector store (Optional) Expiration policy for the vector store
chunking_strategy: chunking_strategy:
type: object $ref: '#/components/schemas/VectorStoreChunkingStrategy'
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >- description: >-
(Optional) Strategy for splitting files into chunks (Optional) Strategy for splitting files into chunks
metadata: metadata:
@ -10085,70 +10141,6 @@ components:
- deleted - deleted
title: VectorStoreDeleteResponse title: VectorStoreDeleteResponse
description: Response from deleting a vector store. description: Response from deleting a vector store.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreFileBatchRequestWithExtraBody": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
type: object type: object
properties: properties:
@ -10606,7 +10598,9 @@ components:
description: >- description: >-
Object type identifier for the search results page Object type identifier for the search results page
search_query: search_query:
type: string type: array
items:
type: string
description: >- description: >-
The original search query that was executed The original search query that was executed
data: data:

View file

@ -163,7 +163,41 @@ docker run \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT
``` ```
### Via venv The container will run the distribution with a SQLite store by default. This store is used for the following components:
- Metadata store: store metadata about the models, providers, etc.
- Inference store: collect of responses from the inference provider
- Agents store: store agent configurations (sessions, turns, etc.)
- Agents Responses store: store responses from the agents
However, you can use PostgreSQL instead by running the `starter::run-with-postgres-store.yaml` configuration:
```bash
docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-e OPENAI_API_KEY=your_openai_key \
-e FIREWORKS_API_KEY=your_fireworks_key \
-e TOGETHER_API_KEY=your_together_key \
-e POSTGRES_HOST=your_postgres_host \
-e POSTGRES_PORT=your_postgres_port \
-e POSTGRES_DB=your_postgres_db \
-e POSTGRES_USER=your_postgres_user \
-e POSTGRES_PASSWORD=your_postgres_password \
llamastack/distribution-starter \
starter::run-with-postgres-store.yaml
```
Postgres environment variables:
- `POSTGRES_HOST`: Postgres host (default: `localhost`)
- `POSTGRES_PORT`: Postgres port (default: `5432`)
- `POSTGRES_DB`: Postgres database name (default: `llamastack`)
- `POSTGRES_USER`: Postgres username (default: `llamastack`)
- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`)
### Via Conda or venv
Ensure you have configured the starter distribution using the environment variables explained above. Ensure you have configured the starter distribution using the environment variables explained above.
@ -171,8 +205,11 @@ Ensure you have configured the starter distribution using the environment variab
# Install dependencies for the starter distribution # Install dependencies for the starter distribution
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
# Run the server # Run the server (with SQLite - default)
uv run --with llama-stack llama stack run starter uv run --with llama-stack llama stack run starter
# Or run with PostgreSQL
uv run --with llama-stack llama stack run starter::run-with-postgres-store.yaml
``` ```
## Example Usage ## Example Usage

View file

@ -16,7 +16,7 @@ Passthrough inference provider for connecting to any external inference service
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint | | `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
## Sample Configuration ## Sample Configuration

View file

@ -9260,6 +9260,70 @@ components:
- metadata - metadata
title: VectorStoreObject title: VectorStoreObject
description: OpenAI Vector Store object. description: OpenAI Vector Store object.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreRequestWithExtraBody": "OpenAICreateVectorStoreRequestWithExtraBody":
type: object type: object
properties: properties:
@ -9285,15 +9349,7 @@ components:
description: >- description: >-
(Optional) Expiration policy for the vector store (Optional) Expiration policy for the vector store
chunking_strategy: chunking_strategy:
type: object $ref: '#/components/schemas/VectorStoreChunkingStrategy'
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >- description: >-
(Optional) Strategy for splitting files into chunks (Optional) Strategy for splitting files into chunks
metadata: metadata:
@ -9369,70 +9425,6 @@ components:
- deleted - deleted
title: VectorStoreDeleteResponse title: VectorStoreDeleteResponse
description: Response from deleting a vector store. description: Response from deleting a vector store.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreFileBatchRequestWithExtraBody": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
type: object type: object
properties: properties:
@ -9890,7 +9882,9 @@ components:
description: >- description: >-
Object type identifier for the search results page Object type identifier for the search results page
search_query: search_query:
type: string type: array
items:
type: string
description: >- description: >-
The original search query that was executed The original search query that was executed
data: data:

View file

@ -9976,6 +9976,70 @@ components:
- metadata - metadata
title: VectorStoreObject title: VectorStoreObject
description: OpenAI Vector Store object. description: OpenAI Vector Store object.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreRequestWithExtraBody": "OpenAICreateVectorStoreRequestWithExtraBody":
type: object type: object
properties: properties:
@ -10001,15 +10065,7 @@ components:
description: >- description: >-
(Optional) Expiration policy for the vector store (Optional) Expiration policy for the vector store
chunking_strategy: chunking_strategy:
type: object $ref: '#/components/schemas/VectorStoreChunkingStrategy'
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >- description: >-
(Optional) Strategy for splitting files into chunks (Optional) Strategy for splitting files into chunks
metadata: metadata:
@ -10085,70 +10141,6 @@ components:
- deleted - deleted
title: VectorStoreDeleteResponse title: VectorStoreDeleteResponse
description: Response from deleting a vector store. description: Response from deleting a vector store.
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
description: >-
Strategy type, always "auto" for automatic chunking
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
description: >-
Automatic chunking strategy for vector store files.
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
description: >-
Strategy type, always "static" for static chunking
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
description: >-
Configuration parameters for the static chunking strategy
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
description: >-
Static chunking strategy with configurable parameters.
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
description: >-
Number of tokens to overlap between adjacent chunks
max_chunk_size_tokens:
type: integer
default: 800
description: >-
Maximum number of tokens per chunk, must be between 100 and 4096
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
description: >-
Configuration for static chunking strategy.
"OpenAICreateVectorStoreFileBatchRequestWithExtraBody": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
type: object type: object
properties: properties:
@ -10606,7 +10598,9 @@ components:
description: >- description: >-
Object type identifier for the search results page Object type identifier for the search results page
search_query: search_query:
type: string type: array
items:
type: string
description: >- description: >-
The original search query that was executed The original search query that was executed
data: data:

View file

@ -405,11 +405,6 @@ fi
echo "=== Running Integration Tests ===" echo "=== Running Integration Tests ==="
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
# Additional exclusions for vllm setup
if [[ "$TEST_SETUP" == "vllm" ]]; then
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
fi
PYTEST_PATTERN="not( $EXCLUDE_TESTS )" PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
if [[ -n "$TEST_PATTERN" ]]; then if [[ -n "$TEST_PATTERN" ]]; then
PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN" PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"

View file

@ -260,7 +260,7 @@ class VectorStoreSearchResponsePage(BaseModel):
""" """
object: str = "vector_store.search_results.page" object: str = "vector_store.search_results.page"
search_query: str search_query: list[str]
data: list[VectorStoreSearchResponse] data: list[VectorStoreSearchResponse]
has_more: bool = False has_more: bool = False
next_page: str | None = None next_page: str | None = None
@ -478,7 +478,7 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
name: str | None = None name: str | None = None
file_ids: list[str] | None = None file_ids: list[str] | None = None
expires_after: dict[str, Any] | None = None expires_after: dict[str, Any] | None = None
chunking_strategy: dict[str, Any] | None = None chunking_strategy: VectorStoreChunkingStrategy | None = None
metadata: dict[str, Any] | None = None metadata: dict[str, Any] | None = None

View file

@ -20,6 +20,8 @@ from llama_stack.apis.vector_io import (
SearchRankingOptions, SearchRankingOptions,
VectorIO, VectorIO,
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
VectorStoreChunkingStrategyStatic,
VectorStoreChunkingStrategyStaticConfig,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileBatchObject, VectorStoreFileBatchObject,
VectorStoreFileContentsResponse, VectorStoreFileContentsResponse,
@ -167,6 +169,13 @@ class VectorIORouter(VectorIO):
if embedding_dimension is not None: if embedding_dimension is not None:
params.model_extra["embedding_dimension"] = embedding_dimension params.model_extra["embedding_dimension"] = embedding_dimension
# Set chunking strategy explicitly if not provided
if params.chunking_strategy is None or params.chunking_strategy.type == "auto":
# actualize the chunking strategy to static
params.chunking_strategy = VectorStoreChunkingStrategyStatic(
static=VectorStoreChunkingStrategyStaticConfig()
)
return await provider.openai_create_vector_store(params) return await provider.openai_create_vector_store(params)
async def openai_list_vector_stores( async def openai_list_vector_stores(
@ -283,6 +292,8 @@ class VectorIORouter(VectorIO):
chunking_strategy: VectorStoreChunkingStrategy | None = None, chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject: ) -> VectorStoreFileObject:
logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}") logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
if chunking_strategy is None or chunking_strategy.type == "auto":
chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
provider = await self.routing_table.get_provider_impl(vector_store_id) provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_attach_file_to_vector_store( return await provider.openai_attach_file_to_vector_store(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,

View file

@ -52,7 +52,17 @@ def resolve_config_or_distro(
logger.debug(f"Using distribution: {distro_config}") logger.debug(f"Using distribution: {distro_config}")
return distro_config return distro_config
# Strategy 3: Try as built distribution name # Strategy 3: Try as distro config path (if no .yaml extension and contains a slash)
# eg: starter::run-with-postgres-store.yaml
# Use :: to avoid slash and confusion with a filesystem path
if "::" in config_or_distro:
distro_name, config_name = config_or_distro.split("::")
distro_config = _get_distro_config_path(distro_name, config_name)
if distro_config.exists():
logger.info(f"Using distribution: {distro_config}")
return distro_config
# Strategy 4: Try as built distribution name
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml" distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
if distrib_config.exists(): if distrib_config.exists():
logger.debug(f"Using built distribution: {distrib_config}") logger.debug(f"Using built distribution: {distrib_config}")
@ -63,13 +73,15 @@ def resolve_config_or_distro(
logger.debug(f"Using built distribution: {distrib_config}") logger.debug(f"Using built distribution: {distrib_config}")
return distrib_config return distrib_config
# Strategy 4: Failed - provide helpful error # Strategy 5: Failed - provide helpful error
raise ValueError(_format_resolution_error(config_or_distro, mode)) raise ValueError(_format_resolution_error(config_or_distro, mode))
def _get_distro_config_path(distro_name: str, mode: Mode) -> Path: def _get_distro_config_path(distro_name: str, mode: str) -> Path:
"""Get the config file path for a distro.""" """Get the config file path for a distro."""
return DISTRO_DIR / distro_name / f"{mode}.yaml" if not mode.endswith(".yaml"):
mode = f"{mode}.yaml"
return DISTRO_DIR / distro_name / mode
def _format_resolution_error(config_or_distro: str, mode: Mode) -> str: def _format_resolution_error(config_or_distro: str, mode: Mode) -> str:

View file

@ -84,6 +84,15 @@ def run_command(command: list[str]) -> int:
text=True, text=True,
check=False, check=False,
) )
# Print stdout and stderr if command failed
if result.returncode != 0:
log.error(f"Command {' '.join(command)} failed with returncode {result.returncode}")
if result.stdout:
log.error(f"STDOUT: {result.stdout}")
if result.stderr:
log.error(f"STDERR: {result.stderr}")
return result.returncode return result.returncode
except subprocess.SubprocessError as e: except subprocess.SubprocessError as e:
log.error(f"Subprocess error: {e}") log.error(f"Subprocess error: {e}")

View file

@ -56,4 +56,5 @@ image_type: venv
additional_pip_packages: additional_pip_packages:
- aiosqlite - aiosqlite
- asyncpg - asyncpg
- psycopg2-binary
- sqlalchemy[asyncio] - sqlalchemy[asyncio]

View file

@ -13,5 +13,6 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
template = get_starter_distribution_template(name="ci-tests") template = get_starter_distribution_template(name="ci-tests")
template.description = "CI tests for Llama Stack" template.description = "CI tests for Llama Stack"
template.run_configs.pop("run-with-postgres-store.yaml", None)
return template return template

View file

@ -1,7 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .postgres_demo import get_distribution_template # noqa: F401

View file

@ -1,23 +0,0 @@
version: 2
distribution_spec:
description: Quick start template for running Llama Stack with several popular providers
providers:
inference:
- provider_type: remote::vllm
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: remote::chromadb
safety:
- provider_type: inline::llama-guard
agents:
- provider_type: inline::meta-reference
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol
image_type: venv
additional_pip_packages:
- asyncpg
- psycopg2-binary
- sqlalchemy[asyncio]

View file

@ -1,125 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.models import ModelType
from llama_stack.core.datatypes import (
BuildProvider,
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.distributions.template import (
DistributionTemplate,
RunConfigSettings,
)
from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
def get_distribution_template() -> DistributionTemplate:
inference_providers = [
Provider(
provider_id="vllm-inference",
provider_type="remote::vllm",
config=VLLMInferenceAdapterConfig.sample_run_config(
url="${env.VLLM_URL:=http://localhost:8000/v1}",
),
),
]
providers = {
"inference": [
BuildProvider(provider_type="remote::vllm"),
BuildProvider(provider_type="inline::sentence-transformers"),
],
"vector_io": [BuildProvider(provider_type="remote::chromadb")],
"safety": [BuildProvider(provider_type="inline::llama-guard")],
"agents": [BuildProvider(provider_type="inline::meta-reference")],
"tool_runtime": [
BuildProvider(provider_type="remote::brave-search"),
BuildProvider(provider_type="remote::tavily-search"),
BuildProvider(provider_type="inline::rag-runtime"),
BuildProvider(provider_type="remote::model-context-protocol"),
],
}
name = "postgres-demo"
vector_io_providers = [
Provider(
provider_id="${env.ENABLE_CHROMADB:+chromadb}",
provider_type="remote::chromadb",
config=ChromaVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
url="${env.CHROMADB_URL:=}",
),
),
]
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::rag",
provider_id="rag-runtime",
),
]
default_models = [
ModelInput(
model_id="${env.INFERENCE_MODEL}",
provider_id="vllm-inference",
)
]
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
embedding_model = ModelInput(
model_id="nomic-embed-text-v1.5",
provider_id=embedding_provider.provider_id,
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 768,
},
)
return DistributionTemplate(
name=name,
distro_type="self_hosted",
description="Quick start template for running Llama Stack with several popular providers",
container_image=None,
template_path=None,
providers=providers,
available_models_by_provider={},
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": inference_providers + [embedding_provider],
"vector_io": vector_io_providers,
},
default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
storage_backends={
"kv_default": PostgresKVStoreConfig.sample_run_config(
table_name="llamastack_kvstore",
),
"sql_default": PostgresSqlStoreConfig.sample_run_config(),
},
),
},
run_config_env_vars={
"LLAMA_STACK_PORT": (
"8321",
"Port for the Llama Stack distribution server",
),
},
)

View file

@ -57,4 +57,5 @@ image_type: venv
additional_pip_packages: additional_pip_packages:
- aiosqlite - aiosqlite
- asyncpg - asyncpg
- psycopg2-binary
- sqlalchemy[asyncio] - sqlalchemy[asyncio]

View file

@ -0,0 +1,281 @@
version: 2
image_name: starter-gpu
apis:
- agents
- batches
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras
config:
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
- provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi
config:
url: ${env.TGI_URL:=}
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock
provider_type: remote::bedrock
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:=}
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
- provider_id: anthropic
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY:=}
- provider_id: gemini
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure
config:
api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
metadata_store:
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
- provider_id: code-scanner
provider_type: inline::code-scanner
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
post_training:
- provider_id: huggingface-gpu
provider_type: inline::huggingface-gpu
config:
checkpoint_format: huggingface
distributed_backend: null
device: cpu
dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
batches:
- provider_id: reference
provider_type: inline::reference
config:
kvstore:
namespace: batches
backend: kv_postgres
storage:
backends:
kv_postgres:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_postgres:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
stores:
metadata:
namespace: registry
backend: kv_postgres
inference:
table_name: inference_store
backend: sql_postgres
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_postgres
prompts:
namespace: prompts
backend: kv_postgres
registered_resources:
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups: []
server:
port: 8321
telemetry:
enabled: true

View file

@ -57,4 +57,5 @@ image_type: venv
additional_pip_packages: additional_pip_packages:
- aiosqlite - aiosqlite
- asyncpg - asyncpg
- psycopg2-binary
- sqlalchemy[asyncio] - sqlalchemy[asyncio]

View file

@ -0,0 +1,278 @@
version: 2
image_name: starter
apis:
- agents
- batches
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras
config:
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
- provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi
config:
url: ${env.TGI_URL:=}
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock
provider_type: remote::bedrock
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:=}
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
- provider_id: anthropic
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY:=}
- provider_id: gemini
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure
config:
api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store:
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
- provider_id: code-scanner
provider_type: inline::code-scanner
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
post_training:
- provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu
config:
checkpoint_format: meta
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
batches:
- provider_id: reference
provider_type: inline::reference
config:
kvstore:
namespace: batches
backend: kv_postgres
storage:
backends:
kv_postgres:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_postgres:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
stores:
metadata:
namespace: registry
backend: kv_postgres
inference:
table_name: inference_store
backend: sql_postgres
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_postgres
prompts:
namespace: prompts
backend: kv_postgres
registered_resources:
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups: []
server:
port: 8321
telemetry:
enabled: true

View file

@ -17,6 +17,11 @@ from llama_stack.core.datatypes import (
ToolGroupInput, ToolGroupInput,
VectorStoresConfig, VectorStoresConfig,
) )
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
SqlStoreReference,
)
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
from llama_stack.providers.datatypes import RemoteProviderSpec from llama_stack.providers.datatypes import RemoteProviderSpec
@ -36,6 +41,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
) )
from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
@ -181,6 +187,62 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
provider_shield_id="${env.CODE_SCANNER_MODEL:=}", provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
), ),
] ]
postgres_config = PostgresSqlStoreConfig.sample_run_config()
default_overrides = {
"inference": remote_inference_providers + [embedding_provider],
"vector_io": [
Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="sqlite-vec",
provider_type="inline::sqlite-vec",
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="${env.MILVUS_URL:+milvus}",
provider_type="inline::milvus",
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="${env.CHROMADB_URL:+chromadb}",
provider_type="remote::chromadb",
config=ChromaVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}/",
url="${env.CHROMADB_URL:=}",
),
),
Provider(
provider_id="${env.PGVECTOR_DB:+pgvector}",
provider_type="remote::pgvector",
config=PGVectorVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
db="${env.PGVECTOR_DB:=}",
user="${env.PGVECTOR_USER:=}",
password="${env.PGVECTOR_PASSWORD:=}",
),
),
Provider(
provider_id="${env.QDRANT_URL:+qdrant}",
provider_type="remote::qdrant",
config=QdrantVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
url="${env.QDRANT_URL:=}",
),
),
Provider(
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
provider_type="remote::weaviate",
config=WeaviateVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
),
),
],
"files": [files_provider],
}
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
@ -189,64 +251,10 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
additional_pip_packages=PostgresSqlStoreConfig.pip_packages(), additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())),
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides=default_overrides,
"inference": remote_inference_providers + [embedding_provider],
"vector_io": [
Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="sqlite-vec",
provider_type="inline::sqlite-vec",
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="${env.MILVUS_URL:+milvus}",
provider_type="inline::milvus",
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="${env.CHROMADB_URL:+chromadb}",
provider_type="remote::chromadb",
config=ChromaVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}/",
url="${env.CHROMADB_URL:=}",
),
),
Provider(
provider_id="${env.PGVECTOR_DB:+pgvector}",
provider_type="remote::pgvector",
config=PGVectorVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
db="${env.PGVECTOR_DB:=}",
user="${env.PGVECTOR_USER:=}",
password="${env.PGVECTOR_PASSWORD:=}",
),
),
Provider(
provider_id="${env.QDRANT_URL:+qdrant}",
provider_type="remote::qdrant",
config=QdrantVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
url="${env.QDRANT_URL:=}",
),
),
Provider(
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
provider_type="remote::weaviate",
config=WeaviateVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
),
),
],
"files": [files_provider],
},
default_models=[], default_models=[],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
default_shields=default_shields, default_shields=default_shields,
@ -261,6 +269,55 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
default_shield_id="llama-guard", default_shield_id="llama-guard",
), ),
), ),
"run-with-postgres-store.yaml": RunConfigSettings(
provider_overrides={
**default_overrides,
"agents": [
Provider(
provider_id="meta-reference",
provider_type="inline::meta-reference",
config=dict(
persistence_store=postgres_config,
responses_store=postgres_config,
),
)
],
"batches": [
Provider(
provider_id="reference",
provider_type="inline::reference",
config=dict(
kvstore=KVStoreReference(
backend="kv_postgres",
namespace="batches",
).model_dump(exclude_none=True),
),
)
],
},
storage_backends={
"kv_postgres": PostgresKVStoreConfig.sample_run_config(),
"sql_postgres": postgres_config,
},
storage_stores={
"metadata": KVStoreReference(
backend="kv_postgres",
namespace="registry",
).model_dump(exclude_none=True),
"inference": InferenceStoreReference(
backend="sql_postgres",
table_name="inference_store",
).model_dump(exclude_none=True),
"conversations": SqlStoreReference(
backend="sql_postgres",
table_name="openai_conversations",
).model_dump(exclude_none=True),
"prompts": KVStoreReference(
backend="kv_postgres",
namespace="prompts",
).model_dump(exclude_none=True),
},
),
}, },
run_config_env_vars={ run_config_env_vars={
"LLAMA_STACK_PORT": ( "LLAMA_STACK_PORT": (

View file

@ -10,8 +10,8 @@ from .config import PassthroughImplConfig
class PassthroughProviderDataValidator(BaseModel): class PassthroughProviderDataValidator(BaseModel):
url: str passthrough_url: str
api_key: str passthrough_api_key: str
async def get_adapter_impl(config: PassthroughImplConfig, _deps): async def get_adapter_impl(config: PassthroughImplConfig, _deps):

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import Field, SecretStr from pydantic import Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type from llama_stack.schema_utils import json_schema_type
@ -19,11 +19,6 @@ class PassthroughImplConfig(RemoteInferenceProviderConfig):
description="The URL for the passthrough endpoint", description="The URL for the passthrough endpoint",
) )
api_key: SecretStr | None = Field(
default=None,
description="API Key for the passthrouth endpoint",
)
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs

View file

@ -5,9 +5,8 @@
# the root directory of this source tree. # the root directory of this source tree.
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
from typing import Any
from llama_stack_client import AsyncLlamaStackClient from openai import AsyncOpenAI
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
Inference, Inference,
@ -20,103 +19,117 @@ from llama_stack.apis.inference import (
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
) )
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
from llama_stack.core.library_client import convert_pydantic_to_json_value from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from .config import PassthroughImplConfig from .config import PassthroughImplConfig
class PassthroughInferenceAdapter(Inference): class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
def __init__(self, config: PassthroughImplConfig) -> None: def __init__(self, config: PassthroughImplConfig) -> None:
ModelRegistryHelper.__init__(self)
self.config = config self.config = config
async def initialize(self) -> None:
pass
async def shutdown(self) -> None:
pass
async def unregister_model(self, model_id: str) -> None: async def unregister_model(self, model_id: str) -> None:
pass pass
async def register_model(self, model: Model) -> Model: async def register_model(self, model: Model) -> Model:
return model return model
def _get_client(self) -> AsyncLlamaStackClient: async def list_models(self) -> list[Model]:
passthrough_url = None """List models by calling the downstream /v1/models endpoint."""
passthrough_api_key = None client = self._get_openai_client()
provider_data = None
if self.config.url is not None: response = await client.models.list()
passthrough_url = self.config.url
else:
provider_data = self.get_request_provider_data()
if provider_data is None or not provider_data.passthrough_url:
raise ValueError(
'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
)
passthrough_url = provider_data.passthrough_url
if self.config.api_key is not None: # Convert from OpenAI format to Llama Stack Model format
passthrough_api_key = self.config.api_key.get_secret_value() models = []
else: for model_data in response.data:
provider_data = self.get_request_provider_data() downstream_model_id = model_data.id
if provider_data is None or not provider_data.passthrough_api_key: custom_metadata = getattr(model_data, "custom_metadata", {}) or {}
raise ValueError(
'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
)
passthrough_api_key = provider_data.passthrough_api_key
return AsyncLlamaStackClient( # Prefix identifier with provider ID for local registry
base_url=passthrough_url, local_identifier = f"{self.__provider_id__}/{downstream_model_id}"
api_key=passthrough_api_key,
provider_data=provider_data, model = Model(
identifier=local_identifier,
provider_id=self.__provider_id__,
provider_resource_id=downstream_model_id,
model_type=custom_metadata.get("model_type", "llm"),
metadata=custom_metadata,
)
models.append(model)
return models
async def should_refresh_models(self) -> bool:
"""Passthrough should refresh models since they come from downstream dynamically."""
return self.config.refresh_models
def _get_openai_client(self) -> AsyncOpenAI:
"""Get an AsyncOpenAI client configured for the downstream server."""
base_url = self._get_passthrough_url()
api_key = self._get_passthrough_api_key()
return AsyncOpenAI(
base_url=f"{base_url.rstrip('/')}/v1",
api_key=api_key,
) )
async def openai_embeddings( def _get_passthrough_url(self) -> str:
self, """Get the passthrough URL from config or provider data."""
params: OpenAIEmbeddingsRequestWithExtraBody, if self.config.url is not None:
) -> OpenAIEmbeddingsResponse: return self.config.url
raise NotImplementedError()
provider_data = self.get_request_provider_data()
if provider_data is None:
raise ValueError(
'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
)
return provider_data.passthrough_url
def _get_passthrough_api_key(self) -> str:
"""Get the passthrough API key from config or provider data."""
if self.config.auth_credential is not None:
return self.config.auth_credential.get_secret_value()
provider_data = self.get_request_provider_data()
if provider_data is None:
raise ValueError(
'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
)
return provider_data.passthrough_api_key
async def openai_completion( async def openai_completion(
self, self,
params: OpenAICompletionRequestWithExtraBody, params: OpenAICompletionRequestWithExtraBody,
) -> OpenAICompletion: ) -> OpenAICompletion:
client = self._get_client() """Forward completion request to downstream using OpenAI client."""
model_obj = await self.model_store.get_model(params.model) client = self._get_openai_client()
params = params.model_copy()
params.model = model_obj.provider_resource_id
request_params = params.model_dump(exclude_none=True) request_params = params.model_dump(exclude_none=True)
response = await client.completions.create(**request_params)
return await client.inference.openai_completion(**request_params) return response # type: ignore
async def openai_chat_completion( async def openai_chat_completion(
self, self,
params: OpenAIChatCompletionRequestWithExtraBody, params: OpenAIChatCompletionRequestWithExtraBody,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
client = self._get_client() """Forward chat completion request to downstream using OpenAI client."""
model_obj = await self.model_store.get_model(params.model) client = self._get_openai_client()
params = params.model_copy()
params.model = model_obj.provider_resource_id
request_params = params.model_dump(exclude_none=True) request_params = params.model_dump(exclude_none=True)
response = await client.chat.completions.create(**request_params)
return response # type: ignore
return await client.inference.openai_chat_completion(**request_params) async def openai_embeddings(
self,
def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]: params: OpenAIEmbeddingsRequestWithExtraBody,
json_params = {} ) -> OpenAIEmbeddingsResponse:
for key, value in request_params.items(): """Forward embeddings request to downstream using OpenAI client."""
json_input = convert_pydantic_to_json_value(value) client = self._get_openai_client()
if isinstance(json_input, dict): request_params = params.model_dump(exclude_none=True)
json_input = {k: v for k, v in json_input.items() if v is not None} response = await client.embeddings.create(**request_params)
elif isinstance(json_input, list): return response # type: ignore
json_input = [x for x in json_input if x is not None]
new_input = []
for x in json_input:
if isinstance(x, dict):
x = {k: v for k, v in x.items() if v is not None}
new_input.append(x)
json_input = new_input
json_params[key] = json_input
return json_params

View file

@ -26,6 +26,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
VectorStoreChunkingStrategyAuto, VectorStoreChunkingStrategyAuto,
VectorStoreChunkingStrategyStatic, VectorStoreChunkingStrategyStatic,
VectorStoreChunkingStrategyStaticConfig,
VectorStoreContent, VectorStoreContent,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileBatchObject, VectorStoreFileBatchObject,
@ -414,6 +415,10 @@ class OpenAIVectorStoreMixin(ABC):
in_progress=0, in_progress=0,
total=0, total=0,
) )
if not params.chunking_strategy or params.chunking_strategy.type == "auto":
chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
else:
chunking_strategy = params.chunking_strategy
store_info: dict[str, Any] = { store_info: dict[str, Any] = {
"id": vector_store_id, "id": vector_store_id,
"object": "vector_store", "object": "vector_store",
@ -426,7 +431,7 @@ class OpenAIVectorStoreMixin(ABC):
"expires_at": None, "expires_at": None,
"last_active_at": created_at, "last_active_at": created_at,
"file_ids": [], "file_ids": [],
"chunking_strategy": params.chunking_strategy, "chunking_strategy": chunking_strategy.model_dump(),
} }
# Add provider information to metadata if provided # Add provider information to metadata if provided
@ -637,7 +642,7 @@ class OpenAIVectorStoreMixin(ABC):
break break
return VectorStoreSearchResponsePage( return VectorStoreSearchResponsePage(
search_query=search_query, search_query=query if isinstance(query, list) else [query],
data=data, data=data,
has_more=False, # For simplicity, we don't implement pagination here has_more=False, # For simplicity, we don't implement pagination here
next_page=None, next_page=None,
@ -647,7 +652,7 @@ class OpenAIVectorStoreMixin(ABC):
logger.error(f"Error searching vector store {vector_store_id}: {e}") logger.error(f"Error searching vector store {vector_store_id}: {e}")
# Return empty results on error # Return empty results on error
return VectorStoreSearchResponsePage( return VectorStoreSearchResponsePage(
search_query=search_query, search_query=query if isinstance(query, list) else [query],
data=[], data=[],
has_more=False, has_more=False,
next_page=None, next_page=None,
@ -886,8 +891,8 @@ class OpenAIVectorStoreMixin(ABC):
# Determine pagination info # Determine pagination info
has_more = len(file_objects) > limit has_more = len(file_objects) > limit
first_id = file_objects[0].id if file_objects else None first_id = limited_files[0].id if file_objects else None
last_id = file_objects[-1].id if file_objects else None last_id = limited_files[-1].id if file_objects else None
return VectorStoreListFilesResponse( return VectorStoreListFilesResponse(
data=limited_files, data=limited_files,

View file

@ -2,7 +2,8 @@
"default": [ "default": [
{"suite": "base", "setup": "ollama"}, {"suite": "base", "setup": "ollama"},
{"suite": "vision", "setup": "ollama-vision"}, {"suite": "vision", "setup": "ollama-vision"},
{"suite": "responses", "setup": "gpt"} {"suite": "responses", "setup": "gpt"},
{"suite": "base-vllm-subset", "setup": "vllm"}
], ],
"schedules": { "schedules": {
"1 0 * * 0": [ "1 0 * * 0": [

View file

@ -0,0 +1,45 @@
{
"test_id": null,
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374291,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-f70298e4ea3e4b4eb7f2cc2deb7a2b01",
"object": "model_permission",
"created": 1762374291,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,84 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_tool_without_schema[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "Call the no args tool"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "no_args_tool",
"description": "Tool with no arguments",
"parameters": {
"type": "object",
"properties": {}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-452805c3c859",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user wants me to call the no args tool. Let me check the available functions. There's only one tool provided, which is the no_args_tool with no arguments. Since the user didn't specify any parameters, I should just return the tool call as instructed. I need to make sure the JSON is correctly formatted and within the XML tags. Alright, that's all I need.\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-7a67269afe214c85924c5171612bbdbd",
"function": {
"arguments": "{}",
"name": "no_args_tool"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 101,
"prompt_tokens": 136,
"total_tokens": 237,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,92 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_chat_completion_with_tools[openai_client-txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "What's the weather in Tokyo?"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather information",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name"
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-496035259763",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user is asking about the weather in Tokyo. I need to use the get_weather function for that. The function requires the location parameter, which in this case is Tokyo. I should make sure to specify \"Tokyo\" as the location. Let me check if there are any other parameters needed, but no, the function only needs the location. So the tool call should be straightforward. I'll format the JSON correctly inside the tool_call tags.\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-959b557fa67e4134a2391f5d35e5d5ae",
"function": {
"arguments": "{\"location\": \"Tokyo\"}",
"name": "get_weather"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 117,
"prompt_tokens": 158,
"total_tokens": 275,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,92 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_format_preserves_complex_schemas[openai_client-txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "Process this data"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "process_data",
"description": "Process structured data",
"parameters": {
"type": "object",
"properties": {
"data": {
"$ref": "#/$defs/DataObject"
}
},
"$defs": {
"DataObject": {
"type": "object",
"properties": {
"values": {
"type": "array",
"items": {
"type": "number"
}
}
}
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-524ead18daad",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user wants me to process the data. Let me check the available tools. There's a function called process_data that takes an object with a 'data' parameter. The data is an array of numbers. But the user hasn't provided any specific data yet. They just said \"Process this data.\" Hmm, maybe they expect me to prompt them for the data first. Wait, maybe there's a misunderstanding. Did they include the data in the conversation history? Let me look back. The user's message is \"Process this data.\" No data provided. Oh, maybe they made a mistake and forgot to include it. I need to ask them to provide the data so I can proceed. Let me confirm if there's any data mentioned. No, the current input is just the instruction. So I should ask the user to supply the data array of numbers to process.\n</think>\n\nPlease provide the structured data you'd like me to process. For example, an array of numbers like `[1, 2, 3]`.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 212,
"prompt_tokens": 180,
"total_tokens": 392,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,98 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tools[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"max_tokens": 4096,
"stream": false,
"tool_choice": "auto",
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-744052775cf9",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user is asking about the weather in San Francisco, CA. I need to use the get_weather function. The function requires the location parameter, which is provided as San Francisco, CA. I should make sure to format the arguments correctly as a JSON object. Let me check the required parameters again. The location is required, so I can't omit it. I'll structure the tool call with the name \"get_weather\" and the arguments including \"location\": \"San Francisco, CA\". That should get the current weather information for the user.\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-b59dc311dd914d3dbd6d455b122bc39c",
"function": {
"arguments": "{\"location\": \"San Francisco, CA\"}",
"name": "get_weather"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 138,
"prompt_tokens": 185,
"total_tokens": 323,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,67 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tool_choice_none[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"max_tokens": 4096,
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-77cf21828360",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user is asking about the weather in San Francisco, CA. I need to check the current weather conditions. But wait, I can't access real-time data. I should mention that I can't provide the current weather forecast and ask them to check a reliable source like the National Weather Service or a weather app. Also, maybe suggest they can provide more details if they need help with something else related to the weather.\n</think>\n\nI'm sorry, but I can't provide real-time weather information. However, you can check the current weather for San Francisco, CA using the National Weather Service (NWS) website, weather apps like Weather.com, or local meteorological services. Let me know if there's anything else I can assist with!",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 154,
"prompt_tokens": 33,
"total_tokens": 187,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,128 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_multiple_tools_with_different_schemas[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "Use one of the available tools"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "simple",
"parameters": {
"type": "object",
"properties": {
"x": {
"type": "string"
}
}
}
}
},
{
"type": "function",
"function": {
"name": "complex",
"parameters": {
"type": "object",
"properties": {
"data": {
"$ref": "#/$defs/Complex"
}
},
"$defs": {
"Complex": {
"type": "object",
"properties": {
"nested": {
"type": "array",
"items": {
"type": "number"
}
}
}
}
}
}
}
},
{
"type": "function",
"function": {
"name": "with_output",
"parameters": {
"type": "object",
"properties": {
"input": {
"type": "string"
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-853f6a700b98",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, let's see. The user wants me to use one of the available tools. The tools provided are simple, complex, and with_output. The simple function takes an argument 'x' of type string. The complex function requires a 'data' parameter that's an object with a nested array of numbers. The with_output function takes an input string.\n\nThe user's query is about using a tool, but there's no specific function name mentioned. Wait, maybe the user expects me to choose one based on the context. Since the tools are available, but the query is a general instruction, perhaps I should ask for clarification. However, the instructions say to use one of the tools if possible. Since the user hasn't specified a particular function, maybe I should check if there's any implicit function needed. But looking at the tools, none are directly related to the query. The user might need to specify which tool to use. Alternatively, maybe the answer is to call the simple function with an example input. But without more context, it's hard to tell. Wait, maybe the user expects me to choose the simplest one. Let's go with the simple function first. So the tool call would be to the simple function with x set to some value. But the user hasn't provided a specific value. Maybe I should state that the tool requires a value. But according to the instructions, if possible, use one of the tools. Since the user hasn't given a value, perhaps the answer is to call the simple function with an example. But the parameters for the simple function require 'x' which is a string. Maybe the user expects me to proceed without needing more info. So I'll proceed by calling the simple function with x as \"example\".\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-12e2ba0189cf484bb936cbb254a5c32a",
"function": {
"arguments": "{\"x\": \"example\"}",
"name": "simple"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 378,
"prompt_tokens": 265,
"total_tokens": 643,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,114 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_tool_with_complex_schema[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "Book a flight from SFO to JFK for John Doe"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "book_flight",
"description": "Book a flight",
"parameters": {
"type": "object",
"properties": {
"flight": {
"$ref": "#/$defs/FlightInfo"
},
"passenger": {
"$ref": "#/$defs/Passenger"
}
},
"required": [
"flight",
"passenger"
],
"$defs": {
"FlightInfo": {
"type": "object",
"properties": {
"from": {
"type": "string"
},
"to": {
"type": "string"
},
"date": {
"type": "string",
"format": "date"
}
}
},
"Passenger": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "integer"
}
}
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-cfb292c0f41d",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user wants to book a flight from SFO to JFK for John Doe. Let me check the tools available. The provided function is book_flight, which requires flight information and a passenger. The parameters needed are flight (as a FlightInfo object) and passenger (with name and age). The user mentioned SFO to JFK, so the flight details are from and to. The passenger's name is John Doe, but the age isn't provided. Wait, the function parameters require the passenger's name and age, but the user only mentioned the name. Maybe the age is missing? But the user didn't specify it, so perhaps I should note that the age is required. However, the function's required parameters are flight and passenger, so even if age is missing, the function can't be called without it. So I need to include both flight info and passenger details. The user's message only gives the name and destination, not the flight details or age. Therefore, I need to ask for the flight details and the passenger's age. But the user hasn't provided those. So I can't proceed with the function call. Wait, but maybe the user expects me to assume some default? No, the function requires all parameters. Since the user hasn't provided flight details or age, I can't call the function. So the correct response is to prompt the user for those details.\n</think>\n\nThe booking requires the flight details and passenger's age. Could you provide the flight number and John Doe's age?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 310,
"prompt_tokens": 261,
"total_tokens": 571,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,96 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_structured_output[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:structured_output]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant. Michael Jordan was born in 1963. His first name is \"Michael\", He played basketball for the Chicago Bulls for 15 seasons and was drafted in 1984"
},
{
"role": "user",
"content": "Please give me information about Michael Jordan."
}
],
"max_tokens": 4096,
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "AnswerFormat",
"schema": {
"properties": {
"first_name": {
"title": "First Name",
"type": "string"
},
"last_name": {
"title": "Last Name",
"type": "string"
},
"year_of_birth": {
"title": "Year Of Birth",
"type": "integer"
}
},
"required": [
"first_name",
"last_name",
"year_of_birth"
],
"title": "AnswerFormat",
"type": "object"
}
}
},
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-df353403c7fb",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 28,
"prompt_tokens": 66,
"total_tokens": 94,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,92 @@
{
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_simple_tool_call[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "What's the weather in San Francisco?"
}
],
"max_tokens": 4096,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name"
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-e89112e7735f",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "<think>\nOkay, the user is asking for the weather in San Francisco. I need to check if there's a function available for that. Looking at the tools provided, there's a function called get_weather that requires a location parameter. The description says it gets weather for a location, and the parameter is the city name. The user provided \"San Francisco\" as the location, so I should call the get_weather function with \"San Francisco\" as the argument. I don't see any other parameters needed here, so the tool call should be straightforward. Just make sure the city name is correctly formatted in JSON.\n</think>\n\n",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "chatcmpl-tool-feead29842dc40b2831c41ed397f555f",
"function": {
"arguments": "{\"location\": \"San Francisco\"}",
"name": "get_weather"
},
"type": "function"
}
],
"reasoning_content": null
},
"stop_reason": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 146,
"prompt_tokens": 161,
"total_tokens": 307,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,53 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"prompt": "I am feeling really sad today.",
"stream": false,
"extra_body": {
"guided_choice": [
"joy",
"sadness"
]
}
},
"endpoint": "/v1/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "rec-f02f1bfd75ad",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "joy",
"stop_reason": null,
"prompt_logprobs": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 2,
"prompt_tokens": 7,
"total_tokens": 9,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375180,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-aeeb49e5e51c42fa94562780165bd620",
"object": "model_permission",
"created": 1762375180,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375115,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-feec0a894be04f738e12b596ff163b64",
"object": "model_permission",
"created": 1762375115,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_stop_sequence[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:stop_sequence]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374330,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-119e17052e4c4c13bd791af3138d5360",
"object": "model_permission",
"created": 1762374330,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375226,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-c6ae673fda084519b3c67947896cd3b0",
"object": "model_permission",
"created": 1762375226,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374573,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-3f422354a81e491b87f93d5b192a0e1a",
"object": "model_permission",
"created": 1762374573,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374305,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-794e16e59ddb4216a8bedfdf485b8f24",
"object": "model_permission",
"created": 1762374305,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=vllm/Qwen/Qwen3-0.6B-True]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374317,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-ff7d26d076eb4373a0631a80fe3ae063",
"object": "model_permission",
"created": 1762374317,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375033,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-2a16fede981b43be9e1cbe3dbedd1e74",
"object": "model_permission",
"created": 1762375033,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374297,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-4bc93704559a4e1d8492aeec7222040c",
"object": "model_permission",
"created": 1762374297,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374532,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-e353aa079d5145c19953791ac99daeba",
"object": "model_permission",
"created": 1762374532,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-True]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375260,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-10c27d1c9e324b18b65321b422e19af9",
"object": "model_permission",
"created": 1762375260,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375040,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-f01c211577294936958dd28046c89dba",
"object": "model_permission",
"created": 1762375040,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_02]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375266,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-7166a6fcd331435eb2d0f0a6b23382ed",
"object": "model_permission",
"created": 1762375266,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374301,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-cd16b092c5a04e719ddf786f0c3e935e",
"object": "model_permission",
"created": 1762374301,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:sanity]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374295,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-9f71adbb206846bb9d0e12834e41551e",
"object": "model_permission",
"created": 1762374295,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_logprobs_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:log_probs]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374342,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-a8b7b38c40584a03b4b346b6c181fb93",
"object": "model_permission",
"created": 1762374342,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_01]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375235,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-dd48560646f141298f5cc2ef3467e54b",
"object": "model_permission",
"created": 1762375235,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374500,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-0ba0c3a54dcb4e57bc0308fd54425933",
"object": "model_permission",
"created": 1762374500,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=vllm/Qwen/Qwen3-0.6B-True]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374311,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-e95a9ed7439245b5995add97fb50f765",
"object": "model_permission",
"created": 1762374311,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375099,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-6b5eba46536f43df902871dd257e1676",
"object": "model_permission",
"created": 1762375099,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375207,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-bbfbcf20cac146e0ae5e45ae6a42632d",
"object": "model_permission",
"created": 1762375207,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375273,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-4935d35e00fd4acdbe78662f42342e77",
"object": "model_permission",
"created": 1762375273,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374591,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-e19031997a1e44d99c8b5ae55725a887",
"object": "model_permission",
"created": 1762374591,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_02]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375027,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-ec44b40a73b04912a837001376b59cff",
"object": "model_permission",
"created": 1762375027,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374356,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-3203232f1dbd426aba98ef1593dd3c01",
"object": "model_permission",
"created": 1762374356,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375248,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-5efe67a621074e979edaaf8fcfee9a80",
"object": "model_permission",
"created": 1762375248,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375135,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-5509cf924e5e4fc89091e4593f264258",
"object": "model_permission",
"created": 1762375135,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_01]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374301,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-72ed55b56df1471b9f71c48bacf8b768",
"object": "model_permission",
"created": 1762374301,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:suffix]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374295,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-abbbfbb49abc4312b2b2011d4d2ba19b",
"object": "model_permission",
"created": 1762374295,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375065,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-d943fbda14264715906334300853cec7",
"object": "model_permission",
"created": 1762375065,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming_with_file[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374323,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-15a0a1106fff4fdd8ce7574373fe3cee",
"object": "model_permission",
"created": 1762374323,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375082,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-ef1d3bc6fefc432380ef0eabdf216fd3",
"object": "model_permission",
"created": 1762375082,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375165,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-b29f7386725b4f13976cd76b6dc3a278",
"object": "model_permission",
"created": 1762375165,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_logprobs[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:log_probs]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374336,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-21db8cc1a31e41eaaa4e653435618645",
"object": "model_permission",
"created": 1762374336,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374547,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-09fed2c5660e42658ab23c6d17b7840c",
"object": "model_permission",
"created": 1762374547,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:sanity]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374297,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-70d68a901d2445f6b7f470c600b34c78",
"object": "model_permission",
"created": 1762374297,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=vllm/Qwen/Qwen3-0.6B-False]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375047,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-a8446fd6718649399402526dc6fe1477",
"object": "model_permission",
"created": 1762375047,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-True]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375254,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-f6eb51901e6443e492061deac904737c",
"object": "model_permission",
"created": 1762375254,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375279,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-31e50ba39ad84a7daa1a24a3c77dc550",
"object": "model_permission",
"created": 1762375279,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375241,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-3ebcc379347541ea94de0f91838829e5",
"object": "model_permission",
"created": 1762375241,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374449,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-311f880045284a469a286b8039177d10",
"object": "model_permission",
"created": 1762374449,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=vllm/Qwen/Qwen3-0.6B-False]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375053,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-2e52800baf7e4d3389892f33feb3f52b",
"object": "model_permission",
"created": 1762375053,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375150,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-cfec81fed838407597a92838017f3ef5",
"object": "model_permission",
"created": 1762375150,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374466,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-8811d359d9724f8cac7fd6df608f69bd",
"object": "model_permission",
"created": 1762374466,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374482,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-d4b4923adfdf40b7bd7698aa798e68eb",
"object": "model_permission",
"created": 1762374482,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-False]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375291,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-a48cfd65bcd847d7aea01d44e8add51e",
"object": "model_permission",
"created": 1762375291,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762374517,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-916d53706b624fefb83e5dcc699e7a69",
"object": "model_permission",
"created": 1762374517,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-False]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1762375285,
"object": "model",
"owned_by": "vllm",
"root": "/root/.cache/Qwen3-0.6B",
"parent": null,
"max_model_len": 8192,
"permission": [
{
"id": "modelperm-e0640be42b814b3394545ebe92d844b3",
"object": "model_permission",
"created": 1762375285,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -78,7 +78,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
"VLLM_URL": "http://localhost:8000/v1", "VLLM_URL": "http://localhost:8000/v1",
}, },
defaults={ defaults={
"text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct", "text_model": "vllm/Qwen/Qwen3-0.6B",
"embedding_model": "sentence-transformers/nomic-embed-text-v1.5", "embedding_model": "sentence-transformers/nomic-embed-text-v1.5",
}, },
), ),
@ -169,6 +169,11 @@ SUITE_DEFINITIONS: dict[str, Suite] = {
roots=base_roots, roots=base_roots,
default_setup="ollama", default_setup="ollama",
), ),
"base-vllm-subset": Suite(
name="base-vllm-subset",
roots=["tests/integration/inference"],
default_setup="vllm",
),
"responses": Suite( "responses": Suite(
name="responses", name="responses",
roots=["tests/integration/responses"], roots=["tests/integration/responses"],

View file

@ -350,7 +350,7 @@ def test_openai_vector_store_search_empty(
assert search_response is not None assert search_response is not None
assert hasattr(search_response, "data") assert hasattr(search_response, "data")
assert len(search_response.data) == 0 # Empty store should return no results assert len(search_response.data) == 0 # Empty store should return no results
assert search_response.search_query == "test query" assert search_response.search_query == ["test query"]
assert search_response.has_more is False assert search_response.has_more is False
@ -679,7 +679,7 @@ def test_openai_vector_store_attach_file(
assert file_attach_response.id == file.id assert file_attach_response.id == file.id
assert file_attach_response.vector_store_id == vector_store.id assert file_attach_response.vector_store_id == vector_store.id
assert file_attach_response.status == "completed" assert file_attach_response.status == "completed"
assert file_attach_response.chunking_strategy.type == "auto" assert file_attach_response.chunking_strategy.type == "static"
assert file_attach_response.created_at > 0 assert file_attach_response.created_at > 0
assert not file_attach_response.last_error assert not file_attach_response.last_error
@ -815,8 +815,8 @@ def test_openai_vector_store_list_files(
assert set(file_ids) == {file.id for file in files_list.data} assert set(file_ids) == {file.id for file in files_list.data}
assert files_list.data[0].object == "vector_store.file" assert files_list.data[0].object == "vector_store.file"
assert files_list.data[0].vector_store_id == vector_store.id assert files_list.data[0].vector_store_id == vector_store.id
assert files_list.data[0].status == "completed" assert files_list.data[0].status in ["completed", "in_progress"]
assert files_list.data[0].chunking_strategy.type == "auto" assert files_list.data[0].chunking_strategy.type == "static"
assert files_list.data[0].created_at > 0 assert files_list.data[0].created_at > 0
assert files_list.first_id == files_list.data[0].id assert files_list.first_id == files_list.data[0].id
assert not files_list.data[0].last_error assert not files_list.data[0].last_error
@ -825,7 +825,7 @@ def test_openai_vector_store_list_files(
assert first_page.has_more assert first_page.has_more
assert len(first_page.data) == 2 assert len(first_page.data) == 2
assert first_page.first_id == first_page.data[0].id assert first_page.first_id == first_page.data[0].id
assert first_page.last_id != first_page.data[-1].id assert first_page.last_id == first_page.data[-1].id
next_page = compat_client.vector_stores.files.list( next_page = compat_client.vector_stores.files.list(
vector_store_id=vector_store.id, limit=2, after=first_page.data[-1].id vector_store_id=vector_store.id, limit=2, after=first_page.data[-1].id