diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index ec4d7f977..d44cba4ee 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -72,7 +72,8 @@ runs: echo "New recordings detected, committing and pushing" git add tests/integration/ - git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})" + git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})" + git fetch origin ${{ github.ref_name }} git rebase origin/${{ github.ref_name }} echo "Rebased successfully" @@ -88,6 +89,8 @@ runs: run: | # Ollama logs (if ollama container exists) sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true + # vllm logs (if vllm container exists) + sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true # Note: distro container logs are now dumped in integration-tests.sh before container is removed - name: Upload logs diff --git a/.github/actions/setup-vllm/action.yml b/.github/actions/setup-vllm/action.yml index 17ebd42f2..34ced0998 100644 --- a/.github/actions/setup-vllm/action.yml +++ b/.github/actions/setup-vllm/action.yml @@ -11,13 +11,14 @@ runs: --name vllm \ -p 8000:8000 \ --privileged=true \ - quay.io/higginsd/vllm-cpu:65393ee064 \ + quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \ --host 0.0.0.0 \ --port 8000 \ --enable-auto-tool-choice \ - --tool-call-parser llama3_json \ - --model /root/.cache/Llama-3.2-1B-Instruct \ - --served-model-name meta-llama/Llama-3.2-1B-Instruct + --tool-call-parser hermes \ + --model /root/.cache/Qwen3-0.6B \ + --served-model-name Qwen/Qwen3-0.6B \ + --max-model-len 8192 # Wait for vllm to be ready echo "Waiting for vllm to be ready..." diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 41822fa79..2c797e906 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -27,7 +27,6 @@ on: schedule: # If changing the cron schedule, update the provider in the test-matrix job - cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC - - cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC workflow_dispatch: inputs: test-all-client-versions: diff --git a/client-sdks/stainless/config-not-source-of-truth-yet.yml b/client-sdks/stainless/config-not-source-of-truth-yet.yml index 6cd526c0f..23be00d74 100644 --- a/client-sdks/stainless/config-not-source-of-truth-yet.yml +++ b/client-sdks/stainless/config-not-source-of-truth-yet.yml @@ -115,9 +115,6 @@ resources: sampling_params: SamplingParams scoring_result: ScoringResult system_message: SystemMessage - query_result: RAGQueryResult - document: RAGDocument - query_config: RAGQueryConfig toolgroups: models: tool_group: ToolGroup @@ -143,11 +140,6 @@ resources: endpoint: get /v1/tool-runtime/list-tools paginated: false invoke_tool: post /v1/tool-runtime/invoke - subresources: - rag_tool: - methods: - insert: post /v1/tool-runtime/rag-tool/insert - query: post /v1/tool-runtime/rag-tool/query responses: models: diff --git a/docs/docs/building_applications/index.mdx b/docs/docs/building_applications/index.mdx index a4b71efd7..935a02f8a 100644 --- a/docs/docs/building_applications/index.mdx +++ b/docs/docs/building_applications/index.mdx @@ -35,9 +35,6 @@ Here are the key topics that will help you build effective AI applications: - **[Telemetry](./telemetry.mdx)** - Monitor and analyze your agents' performance and behavior - **[Safety](./safety.mdx)** - Implement guardrails and safety measures to ensure responsible AI behavior -### 🎮 **Interactive Development** -- **[Playground](./playground.mdx)** - Interactive environment for testing and developing applications - ## Application Patterns ### 🤖 **Conversational Agents** diff --git a/docs/docs/building_applications/playground.mdx b/docs/docs/building_applications/playground.mdx deleted file mode 100644 index f3290a356..000000000 --- a/docs/docs/building_applications/playground.mdx +++ /dev/null @@ -1,298 +0,0 @@ ---- -title: Llama Stack Playground -description: Interactive interface to explore and experiment with Llama Stack capabilities -sidebar_label: Playground -sidebar_position: 10 ---- - -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; - -# Llama Stack Playground - -:::note[Experimental Feature] -The Llama Stack Playground is currently experimental and subject to change. We welcome feedback and contributions to help improve it. -::: - -The Llama Stack Playground is a simple interface that aims to: -- **Showcase capabilities and concepts** of Llama Stack in an interactive environment -- **Demo end-to-end application code** to help users get started building their own applications -- **Provide a UI** to help users inspect and understand Llama Stack API providers and resources - -## Key Features - -### Interactive Playground Pages - -The playground provides interactive pages for users to explore Llama Stack API capabilities: - -#### Chatbot Interface - - - - - - -**Simple Chat Interface** -- Chat directly with Llama models through an intuitive interface -- Uses the `/chat/completions` streaming API under the hood -- Real-time message streaming for responsive interactions -- Perfect for testing model capabilities and prompt engineering - - - - -**Document-Aware Conversations** -- Upload documents to create memory banks -- Chat with a RAG-enabled agent that can query your documents -- Uses Llama Stack's `/agents` API to create and manage RAG sessions -- Ideal for exploring knowledge-enhanced AI applications - - - - -#### Evaluation Interface - - - - - - -**Custom Dataset Evaluation** -- Upload your own evaluation datasets -- Run evaluations using available scoring functions -- Uses Llama Stack's `/scoring` API for flexible evaluation workflows -- Great for testing application performance on custom metrics - - - - - - -**Pre-registered Evaluation Tasks** -- Evaluate models or agents on pre-defined tasks -- Uses Llama Stack's `/eval` API for comprehensive evaluation -- Combines datasets and scoring functions for standardized testing - -**Setup Requirements:** -Register evaluation datasets and benchmarks first: - -```bash -# Register evaluation dataset -llama-stack-client datasets register \ - --dataset-id "mmlu" \ - --provider-id "huggingface" \ - --url "https://huggingface.co/datasets/llamastack/evals" \ - --metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \ - --schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string"}, "chat_completion_input": {"type": "string"}}' - -# Register benchmark task -llama-stack-client benchmarks register \ - --eval-task-id meta-reference-mmlu \ - --provider-id meta-reference \ - --dataset-id mmlu \ - --scoring-functions basic::regex_parser_multiple_choice_answer -``` - - - - -#### Inspection Interface - - - - - - -**Provider Management** -- Inspect available Llama Stack API providers -- View provider configurations and capabilities -- Uses the `/providers` API for real-time provider information -- Essential for understanding your deployment's capabilities - - - - -**Resource Exploration** -- Inspect Llama Stack API resources including: - - **Models**: Available language models - - **Datasets**: Registered evaluation datasets - - **Memory Banks**: Vector databases and knowledge stores - - **Benchmarks**: Evaluation tasks and scoring functions - - **Shields**: Safety and content moderation tools -- Uses `//list` APIs for comprehensive resource visibility -- For detailed information about resources, see [Core Concepts](/docs/concepts) - - - - -## Getting Started - -### Quick Start Guide - - - - -**1. Start the Llama Stack API Server** - -```bash -llama stack list-deps together | xargs -L1 uv pip install -llama stack run together -``` - -**2. Start the Streamlit UI** - -```bash -# Launch the playground interface -uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py -``` - - - - -**Making the Most of the Playground:** - -- **Start with Chat**: Test basic model interactions and prompt engineering -- **Explore RAG**: Upload sample documents to see knowledge-enhanced responses -- **Try Evaluations**: Use the scoring interface to understand evaluation metrics -- **Inspect Resources**: Check what providers and resources are available -- **Experiment with Settings**: Adjust parameters to see how they affect results - - - - -### Available Distributions - -The playground works with any Llama Stack distribution. Popular options include: - - - - -```bash -llama stack list-deps together | xargs -L1 uv pip install -llama stack run together -``` - -**Features:** -- Cloud-hosted models -- Fast inference -- Multiple model options - - - - -```bash -llama stack list-deps ollama | xargs -L1 uv pip install -llama stack run ollama -``` - -**Features:** -- Local model execution -- Privacy-focused -- No internet required - - - - -```bash -llama stack list-deps meta-reference | xargs -L1 uv pip install -llama stack run meta-reference -``` - -**Features:** -- Reference implementation -- All API features available -- Best for development - - - - -## Use Cases & Examples - -### Educational Use Cases -- **Learning Llama Stack**: Hands-on exploration of API capabilities -- **Prompt Engineering**: Interactive testing of different prompting strategies -- **RAG Experimentation**: Understanding how document retrieval affects responses -- **Evaluation Understanding**: See how different metrics evaluate model performance - -### Development Use Cases -- **Prototype Testing**: Quick validation of application concepts -- **API Exploration**: Understanding available endpoints and parameters -- **Integration Planning**: Seeing how different components work together -- **Demo Creation**: Showcasing Llama Stack capabilities to stakeholders - -### Research Use Cases -- **Model Comparison**: Side-by-side testing of different models -- **Evaluation Design**: Understanding how scoring functions work -- **Safety Testing**: Exploring shield effectiveness with different inputs -- **Performance Analysis**: Measuring model behavior across different scenarios - -## Best Practices - -### 🚀 **Getting Started** -- Begin with simple chat interactions to understand basic functionality -- Gradually explore more advanced features like RAG and evaluations -- Use the inspection tools to understand your deployment's capabilities - -### 🔧 **Development Workflow** -- Use the playground to prototype before writing application code -- Test different parameter settings interactively -- Validate evaluation approaches before implementing them programmatically - -### 📊 **Evaluation & Testing** -- Start with simple scoring functions before trying complex evaluations -- Use the playground to understand evaluation results before automation -- Test safety features with various input types - -### 🎯 **Production Preparation** -- Use playground insights to inform your production API usage -- Test edge cases and error conditions interactively -- Validate resource configurations before deployment - -## Related Resources - -- **[Getting Started Guide](../getting_started/quickstart)** - Complete setup and introduction -- **[Core Concepts](/docs/concepts)** - Understanding Llama Stack fundamentals -- **[Agents](./agent)** - Building intelligent agents -- **[RAG (Retrieval Augmented Generation)](./rag)** - Knowledge-enhanced applications -- **[Evaluations](./evals)** - Comprehensive evaluation framework -- **[API Reference](/docs/api/llama-stack-specification)** - Complete API documentation diff --git a/docs/docs/distributions/index.mdx b/docs/docs/distributions/index.mdx index 0149f143f..ebf4bd6ce 100644 --- a/docs/docs/distributions/index.mdx +++ b/docs/docs/distributions/index.mdx @@ -19,3 +19,4 @@ This section provides an overview of the distributions available in Llama Stack. - **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions - **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code - **[Configuration Reference](./configuration.mdx)** - Configuration file format details +- **[Llama Stack UI](./llama_stack_ui.mdx)** - Web-based user interface for interacting with Llama Stack servers diff --git a/docs/docs/distributions/llama_stack_ui.mdx b/docs/docs/distributions/llama_stack_ui.mdx new file mode 100644 index 000000000..7ba47ea4d --- /dev/null +++ b/docs/docs/distributions/llama_stack_ui.mdx @@ -0,0 +1,109 @@ +--- +title: Llama Stack UI +description: Web-based user interface for interacting with Llama Stack servers +sidebar_label: Llama Stack UI +sidebar_position: 8 +--- + +# Llama Stack UI + +The Llama Stack UI is a web-based interface for interacting with Llama Stack servers. Built with Next.js and React, it provides a visual way to work with agents, manage resources, and view logs. + +## Features + +- **Logs & Monitoring**: View chat completions, agent responses, and vector store activity +- **Vector Stores**: Create and manage vector databases for RAG (Retrieval-Augmented Generation) workflows +- **Prompt Management**: Create and manage reusable prompts + +## Prerequisites + +You need a running Llama Stack server. The UI is a client that connects to the Llama Stack backend. + +If you don't have a Llama Stack server running yet, see the [Starting Llama Stack Server](../getting_started/starting_llama_stack_server.mdx) guide. + +## Running the UI + +### Option 1: Using npx (Recommended for Quick Start) + +The fastest way to get started is using `npx`: + +```bash +npx llama-stack-ui +``` + +This will start the UI server on `http://localhost:8322` (default port). + +### Option 2: Using Docker + +Run the UI in a container: + +```bash +docker run -p 8322:8322 llamastack/ui +``` + +Access the UI at `http://localhost:8322`. + +## Environment Variables + +The UI can be configured using the following environment variables: + +| Variable | Description | Default | +|----------|-------------|---------| +| `LLAMA_STACK_BACKEND_URL` | URL of your Llama Stack server | `http://localhost:8321` | +| `LLAMA_STACK_UI_PORT` | Port for the UI server | `8322` | + +If the Llama Stack server is running with authentication enabled, you can configure the UI to use it by setting the following environment variables: + +| Variable | Description | Default | +|----------|-------------|---------| +| `NEXTAUTH_URL` | NextAuth URL for authentication | `http://localhost:8322` | +| `GITHUB_CLIENT_ID` | GitHub OAuth client ID (optional, for authentication) | - | +| `GITHUB_CLIENT_SECRET` | GitHub OAuth client secret (optional, for authentication) | - | + +### Setting Environment Variables + +#### For npx: + +```bash +LLAMA_STACK_BACKEND_URL=http://localhost:8321 \ +LLAMA_STACK_UI_PORT=8080 \ +npx llama-stack-ui +``` + +#### For Docker: + +```bash +docker run -p 8080:8080 \ + -e LLAMA_STACK_BACKEND_URL=http://localhost:8321 \ + -e LLAMA_STACK_UI_PORT=8080 \ + llamastack/ui +``` + +## Using the UI + +### Managing Resources + +- **Vector Stores**: Create vector databases for RAG workflows, view stored documents and embeddings +- **Prompts**: Create and manage reusable prompt templates +- **Chat Completions**: View history of chat interactions +- **Responses**: Browse detailed agent responses and tool calls + +## Development + +If you want to run the UI from source for development: + +```bash +# From the project root +cd src/llama_stack_ui + +# Install dependencies +npm install + +# Set environment variables +export LLAMA_STACK_BACKEND_URL=http://localhost:8321 + +# Start the development server +npm run dev +``` + +The development server will start on `http://localhost:8322` with hot reloading enabled. diff --git a/docs/docs/distributions/self_hosted_distro/starter.md b/docs/docs/distributions/self_hosted_distro/starter.md index f6786a95c..84c35f3d3 100644 --- a/docs/docs/distributions/self_hosted_distro/starter.md +++ b/docs/docs/distributions/self_hosted_distro/starter.md @@ -163,7 +163,41 @@ docker run \ --port $LLAMA_STACK_PORT ``` -### Via venv +The container will run the distribution with a SQLite store by default. This store is used for the following components: + +- Metadata store: store metadata about the models, providers, etc. +- Inference store: collect of responses from the inference provider +- Agents store: store agent configurations (sessions, turns, etc.) +- Agents Responses store: store responses from the agents + +However, you can use PostgreSQL instead by running the `starter::run-with-postgres-store.yaml` configuration: + +```bash +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -e OPENAI_API_KEY=your_openai_key \ + -e FIREWORKS_API_KEY=your_fireworks_key \ + -e TOGETHER_API_KEY=your_together_key \ + -e POSTGRES_HOST=your_postgres_host \ + -e POSTGRES_PORT=your_postgres_port \ + -e POSTGRES_DB=your_postgres_db \ + -e POSTGRES_USER=your_postgres_user \ + -e POSTGRES_PASSWORD=your_postgres_password \ + llamastack/distribution-starter \ + starter::run-with-postgres-store.yaml +``` + +Postgres environment variables: + +- `POSTGRES_HOST`: Postgres host (default: `localhost`) +- `POSTGRES_PORT`: Postgres port (default: `5432`) +- `POSTGRES_DB`: Postgres database name (default: `llamastack`) +- `POSTGRES_USER`: Postgres username (default: `llamastack`) +- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`) + +### Via Conda or venv Ensure you have configured the starter distribution using the environment variables explained above. @@ -171,8 +205,11 @@ Ensure you have configured the starter distribution using the environment variab # Install dependencies for the starter distribution uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install -# Run the server +# Run the server (with SQLite - default) uv run --with llama-stack llama stack run starter + +# Or run with PostgreSQL +uv run --with llama-stack llama stack run starter::run-with-postgres-store.yaml ``` ## Example Usage diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx index 7a2931690..957cd04da 100644 --- a/docs/docs/providers/inference/remote_passthrough.mdx +++ b/docs/docs/providers/inference/remote_passthrough.mdx @@ -16,7 +16,7 @@ Passthrough inference provider for connecting to any external inference service |-------|------|----------|---------|-------------| | `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `url` | `` | No | | The URL for the passthrough endpoint | ## Sample Configuration diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 641c2eed3..7b4ac5ac8 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -57,6 +57,7 @@ const sidebars: SidebarsConfig = { 'distributions/importing_as_library', 'distributions/configuration', 'distributions/starting_llama_stack_server', + 'distributions/llama_stack_ui', { type: 'category', label: 'Self-Hosted Distributions', diff --git a/pyproject.toml b/pyproject.toml index 8f07f9cbd..f8577ad2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,14 +51,6 @@ dependencies = [ "sqlalchemy[asyncio]>=2.0.41", # server - for conversations ] -[project.optional-dependencies] -ui = [ - "streamlit", - "pandas", - "llama-stack-client>=0.3.0", - "streamlit-option-menu", -] - [dependency-groups] dev = [ "pytest>=8.4", diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index 372e97d8c..0951feb14 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -231,7 +231,8 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then # Use a fixed port for the OTEL collector so the server can connect to it COLLECTOR_PORT=4317 export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}" - export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}" + # Disabled: https://github.com/llamastack/llama-stack/issues/4089 + #export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}" export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf" export OTEL_BSP_SCHEDULE_DELAY="200" export OTEL_BSP_EXPORT_TIMEOUT="2000" @@ -337,7 +338,8 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then DOCKER_ENV_VARS="" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server" - DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}" + # Disabled: https://github.com/llamastack/llama-stack/issues/4089 + #DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_METRIC_EXPORT_INTERVAL=200" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_SCHEDULE_DELAY=200" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_EXPORT_TIMEOUT=2000" @@ -353,6 +355,10 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then [ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL" [ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL" + if [[ "$TEST_SETUP" == "vllm" ]]; then + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e VLLM_URL=http://localhost:8000/v1" + fi + # Determine the actual image name (may have localhost/ prefix) IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1) if [[ -z "$IMAGE_NAME" ]]; then @@ -405,11 +411,6 @@ fi echo "=== Running Integration Tests ===" EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" -# Additional exclusions for vllm setup -if [[ "$TEST_SETUP" == "vllm" ]]; then - EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" -fi - PYTEST_PATTERN="not( $EXCLUDE_TESTS )" if [[ -n "$TEST_PATTERN" ]]; then PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN" diff --git a/src/llama_stack/apis/common/responses.py b/src/llama_stack/apis/common/responses.py index 616bee73a..53a290eea 100644 --- a/src/llama_stack/apis/common/responses.py +++ b/src/llama_stack/apis/common/responses.py @@ -34,3 +34,44 @@ class PaginatedResponse(BaseModel): data: list[dict[str, Any]] has_more: bool url: str | None = None + + +# This is a short term solution to allow inference API to return metrics +# The ideal way to do this is to have a way for all response types to include metrics +# and all metric events logged to the telemetry API to be included with the response +# To do this, we will need to augment all response types with a metrics field. +# We have hit a blocker from stainless SDK that prevents us from doing this. +# The blocker is that if we were to augment the response types that have a data field +# in them like so +# class ListModelsResponse(BaseModel): +# metrics: Optional[List[MetricEvent]] = None +# data: List[Models] +# ... +# The client SDK will need to access the data by using a .data field, which is not +# ergonomic. Stainless SDK does support unwrapping the response type, but it +# requires that the response type to only have a single field. + +# We will need a way in the client SDK to signal that the metrics are needed +# and if they are needed, the client SDK has to return the full response type +# without unwrapping it. + + +@json_schema_type +class MetricInResponse(BaseModel): + """A metric value included in API responses. + :param metric: The name of the metric + :param value: The numeric value of the metric + :param unit: (Optional) The unit of measurement for the metric value + """ + + metric: str + value: int | float + unit: str | None = None + + +class MetricResponseMixin(BaseModel): + """Mixin class for API responses that can include metrics. + :param metrics: (Optional) List of metrics associated with the API response + """ + + metrics: list[MetricInResponse] | None = None diff --git a/src/llama_stack/apis/common/tracing.py b/src/llama_stack/apis/common/tracing.py new file mode 100644 index 000000000..830c2945a --- /dev/null +++ b/src/llama_stack/apis/common/tracing.py @@ -0,0 +1,22 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +def telemetry_traceable(cls): + """ + Mark a protocol for automatic tracing when telemetry is enabled. + + This is a metadata-only decorator with no dependencies on core. + Actual tracing is applied by core routers at runtime if telemetry is enabled. + + Usage: + @runtime_checkable + @telemetry_traceable + class MyProtocol(Protocol): + ... + """ + cls.__marked_for_tracing__ = True + return cls diff --git a/src/llama_stack/apis/conversations/conversations.py b/src/llama_stack/apis/conversations/conversations.py index 6ec7e67d6..3fdd3b47e 100644 --- a/src/llama_stack/apis/conversations/conversations.py +++ b/src/llama_stack/apis/conversations/conversations.py @@ -20,8 +20,8 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageMCPListTools, OpenAIResponseOutputMessageWebSearchToolCall, ) +from llama_stack.apis.common.tracing import telemetry_traceable from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, register_schema, webmethod Metadata = dict[str, str] @@ -157,7 +157,7 @@ class ConversationItemDeletedResource(BaseModel): @runtime_checkable -@trace_protocol +@telemetry_traceable class Conversations(Protocol): """Conversations diff --git a/src/llama_stack/apis/files/files.py b/src/llama_stack/apis/files/files.py index 657e9f500..f0ea2f892 100644 --- a/src/llama_stack/apis/files/files.py +++ b/src/llama_stack/apis/files/files.py @@ -11,8 +11,8 @@ from fastapi import File, Form, Response, UploadFile from pydantic import BaseModel, Field from llama_stack.apis.common.responses import Order +from llama_stack.apis.common.tracing import telemetry_traceable from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -102,7 +102,7 @@ class OpenAIFileDeleteResponse(BaseModel): @runtime_checkable -@trace_protocol +@telemetry_traceable class Files(Protocol): """Files diff --git a/src/llama_stack/apis/inference/inference.py b/src/llama_stack/apis/inference/inference.py index f39957190..1a865ce5f 100644 --- a/src/llama_stack/apis/inference/inference.py +++ b/src/llama_stack/apis/inference/inference.py @@ -19,11 +19,10 @@ from pydantic import BaseModel, Field, field_validator from typing_extensions import TypedDict from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent -from llama_stack.apis.common.responses import Order +from llama_stack.apis.common.responses import MetricResponseMixin, Order +from llama_stack.apis.common.tracing import telemetry_traceable from llama_stack.apis.models import Model from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA -from llama_stack.core.telemetry.telemetry import MetricResponseMixin -from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.models.llama.datatypes import ( BuiltinTool, StopReason, @@ -1160,7 +1159,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"): @runtime_checkable -@trace_protocol +@telemetry_traceable class InferenceProvider(Protocol): """ This protocol defines the interface that should be implemented by all inference providers. diff --git a/src/llama_stack/apis/models/models.py b/src/llama_stack/apis/models/models.py index 552f47c30..5c976886c 100644 --- a/src/llama_stack/apis/models/models.py +++ b/src/llama_stack/apis/models/models.py @@ -9,9 +9,9 @@ from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, ConfigDict, Field, field_validator +from llama_stack.apis.common.tracing import telemetry_traceable from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -105,7 +105,7 @@ class OpenAIListModelsResponse(BaseModel): @runtime_checkable -@trace_protocol +@telemetry_traceable class Models(Protocol): async def list_models(self) -> ListModelsResponse: """List all models. diff --git a/src/llama_stack/apis/prompts/prompts.py b/src/llama_stack/apis/prompts/prompts.py index 4651b9294..406ae529c 100644 --- a/src/llama_stack/apis/prompts/prompts.py +++ b/src/llama_stack/apis/prompts/prompts.py @@ -10,8 +10,8 @@ from typing import Protocol, runtime_checkable from pydantic import BaseModel, Field, field_validator, model_validator +from llama_stack.apis.common.tracing import telemetry_traceable from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -92,7 +92,7 @@ class ListPromptsResponse(BaseModel): @runtime_checkable -@trace_protocol +@telemetry_traceable class Prompts(Protocol): """Prompts diff --git a/src/llama_stack/apis/safety/safety.py b/src/llama_stack/apis/safety/safety.py index 97fffcff1..8872cc518 100644 --- a/src/llama_stack/apis/safety/safety.py +++ b/src/llama_stack/apis/safety/safety.py @@ -9,10 +9,10 @@ from typing import Any, Protocol, runtime_checkable from pydantic import BaseModel, Field +from llama_stack.apis.common.tracing import telemetry_traceable from llama_stack.apis.inference import OpenAIMessageParam from llama_stack.apis.shields import Shield from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -94,7 +94,7 @@ class ShieldStore(Protocol): @runtime_checkable -@trace_protocol +@telemetry_traceable class Safety(Protocol): """Safety diff --git a/src/llama_stack/apis/shields/shields.py b/src/llama_stack/apis/shields/shields.py index 565e1db15..ca4483828 100644 --- a/src/llama_stack/apis/shields/shields.py +++ b/src/llama_stack/apis/shields/shields.py @@ -8,9 +8,9 @@ from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel +from llama_stack.apis.common.tracing import telemetry_traceable from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -48,7 +48,7 @@ class ListShieldsResponse(BaseModel): @runtime_checkable -@trace_protocol +@telemetry_traceable class Shields(Protocol): @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1) async def list_shields(self) -> ListShieldsResponse: diff --git a/src/llama_stack/apis/tools/tools.py b/src/llama_stack/apis/tools/tools.py index 29065a713..c9bdfcfb6 100644 --- a/src/llama_stack/apis/tools/tools.py +++ b/src/llama_stack/apis/tools/tools.py @@ -11,9 +11,9 @@ from pydantic import BaseModel from typing_extensions import runtime_checkable from llama_stack.apis.common.content_types import URL, InterleavedContent +from llama_stack.apis.common.tracing import telemetry_traceable from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -107,7 +107,7 @@ class ListToolDefsResponse(BaseModel): @runtime_checkable -@trace_protocol +@telemetry_traceable class ToolGroups(Protocol): @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1) async def register_tool_group( @@ -189,7 +189,7 @@ class SpecialToolGroup(Enum): @runtime_checkable -@trace_protocol +@telemetry_traceable class ToolRuntime(Protocol): tool_store: ToolStore | None = None diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py index 9148d10e5..26c961db3 100644 --- a/src/llama_stack/apis/vector_io/vector_io.py +++ b/src/llama_stack/apis/vector_io/vector_io.py @@ -13,10 +13,10 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable from fastapi import Body from pydantic import BaseModel, Field +from llama_stack.apis.common.tracing import telemetry_traceable from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.strong_typing.schema import register_schema @@ -502,7 +502,7 @@ class VectorStoreTable(Protocol): @runtime_checkable -@trace_protocol +@telemetry_traceable class VectorIO(Protocol): vector_store_table: VectorStoreTable | None = None diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py index 805d260fc..8bf371fed 100644 --- a/src/llama_stack/core/resolver.py +++ b/src/llama_stack/core/resolver.py @@ -397,6 +397,18 @@ async def instantiate_provider( impl.__provider_spec__ = provider_spec impl.__provider_config__ = config + # Apply tracing if telemetry is enabled and any base class has __marked_for_tracing__ marker + if run_config.telemetry.enabled: + traced_classes = [ + base for base in reversed(impl.__class__.__mro__) if getattr(base, "__marked_for_tracing__", False) + ] + + if traced_classes: + from llama_stack.core.telemetry.trace_protocol import trace_protocol + + for cls in traced_classes: + trace_protocol(cls) + protocols = api_protocol_map_for_compliance_check(run_config) additional_protocols = additional_protocols_map() # TODO: check compliance for special tool groups diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py index 204cbb87f..729d1c9ea 100644 --- a/src/llama_stack/core/routers/__init__.py +++ b/src/llama_stack/core/routers/__init__.py @@ -45,6 +45,7 @@ async def get_routing_table_impl( raise ValueError(f"API {api.value} not found in router map") impl = api_to_tables[api.value](impls_by_provider_id, dist_registry, policy) + await impl.initialize() return impl @@ -92,5 +93,6 @@ async def get_auto_router_impl( api_to_dep_impl["safety_config"] = run_config.safety impl = api_to_routers[api.value](routing_table, **api_to_dep_impl) + await impl.initialize() return impl diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py index 9476c961a..459c1aa1a 100644 --- a/src/llama_stack/core/telemetry/telemetry.py +++ b/src/llama_stack/core/telemetry/telemetry.py @@ -163,47 +163,6 @@ class MetricEvent(EventCommon): unit: str -@json_schema_type -class MetricInResponse(BaseModel): - """A metric value included in API responses. - :param metric: The name of the metric - :param value: The numeric value of the metric - :param unit: (Optional) The unit of measurement for the metric value - """ - - metric: str - value: int | float - unit: str | None = None - - -# This is a short term solution to allow inference API to return metrics -# The ideal way to do this is to have a way for all response types to include metrics -# and all metric events logged to the telemetry API to be included with the response -# To do this, we will need to augment all response types with a metrics field. -# We have hit a blocker from stainless SDK that prevents us from doing this. -# The blocker is that if we were to augment the response types that have a data field -# in them like so -# class ListModelsResponse(BaseModel): -# metrics: Optional[List[MetricEvent]] = None -# data: List[Models] -# ... -# The client SDK will need to access the data by using a .data field, which is not -# ergonomic. Stainless SDK does support unwrapping the response type, but it -# requires that the response type to only have a single field. - -# We will need a way in the client SDK to signal that the metrics are needed -# and if they are needed, the client SDK has to return the full response type -# without unwrapping it. - - -class MetricResponseMixin(BaseModel): - """Mixin class for API responses that can include metrics. - :param metrics: (Optional) List of metrics associated with the API response - """ - - metrics: list[MetricInResponse] | None = None - - @json_schema_type class StructuredLogType(Enum): """The type of structured log event payload. diff --git a/src/llama_stack/core/telemetry/trace_protocol.py b/src/llama_stack/core/telemetry/trace_protocol.py index 807b8e2a9..95b33a4bc 100644 --- a/src/llama_stack/core/telemetry/trace_protocol.py +++ b/src/llama_stack/core/telemetry/trace_protocol.py @@ -129,6 +129,15 @@ def trace_protocol[T: type[Any]](cls: T) -> T: else: return sync_wrapper + # Wrap methods on the class itself (for classes applied at runtime) + # Skip if already wrapped (indicated by __wrapped__ attribute) + for name, method in vars(cls).items(): + if inspect.isfunction(method) and not name.startswith("_"): + if not hasattr(method, "__wrapped__"): + wrapped = trace_method(method) + setattr(cls, name, wrapped) # noqa: B010 + + # Also set up __init_subclass__ for future subclasses original_init_subclass = cast(Callable[..., Any] | None, getattr(cls, "__init_subclass__", None)) def __init_subclass__(cls_child: type[Any], **kwargs: Any) -> None: # noqa: N807 diff --git a/src/llama_stack/core/ui/Containerfile b/src/llama_stack/core/ui/Containerfile deleted file mode 100644 index 0126d1867..000000000 --- a/src/llama_stack/core/ui/Containerfile +++ /dev/null @@ -1,11 +0,0 @@ -# More info on playground configuration can be found here: -# https://llama-stack.readthedocs.io/en/latest/playground - -FROM python:3.12-slim -WORKDIR /app -COPY . /app/ -RUN /usr/local/bin/python -m pip install --upgrade pip && \ - /usr/local/bin/pip3 install -r requirements.txt -EXPOSE 8501 - -ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] diff --git a/src/llama_stack/core/ui/README.md b/src/llama_stack/core/ui/README.md deleted file mode 100644 index 37f1501c9..000000000 --- a/src/llama_stack/core/ui/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# (Experimental) LLama Stack UI - -## Docker Setup - -:warning: This is a work in progress. - -## Developer Setup - -1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll). - -``` -llama stack list-deps together | xargs -L1 uv pip install - -llama stack run together -``` - -2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page). - -```bash -llama-stack-client datasets register \ ---dataset-id "mmlu" \ ---provider-id "huggingface" \ ---url "https://huggingface.co/datasets/llamastack/evals" \ ---metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \ ---schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}' -``` - -```bash -llama-stack-client benchmarks register \ ---eval-task-id meta-reference-mmlu \ ---provider-id meta-reference \ ---dataset-id mmlu \ ---scoring-functions basic::regex_parser_multiple_choice_answer -``` - -3. Start Streamlit UI - -```bash -uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py -``` - -## Environment Variables - -| Environment Variable | Description | Default Value | -|----------------------------|------------------------------------|---------------------------| -| LLAMA_STACK_ENDPOINT | The endpoint for the Llama Stack | http://localhost:8321 | -| FIREWORKS_API_KEY | API key for Fireworks provider | (empty string) | -| TOGETHER_API_KEY | API key for Together provider | (empty string) | -| SAMBANOVA_API_KEY | API key for SambaNova provider | (empty string) | -| OPENAI_API_KEY | API key for OpenAI provider | (empty string) | diff --git a/src/llama_stack/core/ui/__init__.py b/src/llama_stack/core/ui/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/src/llama_stack/core/ui/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/src/llama_stack/core/ui/app.py b/src/llama_stack/core/ui/app.py deleted file mode 100644 index 441f65d20..000000000 --- a/src/llama_stack/core/ui/app.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -import streamlit as st - - -def main(): - # Evaluation pages - application_evaluation_page = st.Page( - "page/evaluations/app_eval.py", - title="Evaluations (Scoring)", - icon="📊", - default=False, - ) - native_evaluation_page = st.Page( - "page/evaluations/native_eval.py", - title="Evaluations (Generation + Scoring)", - icon="📊", - default=False, - ) - - # Playground pages - chat_page = st.Page("page/playground/chat.py", title="Chat", icon="💬", default=True) - rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False) - tool_page = st.Page("page/playground/tools.py", title="Tools", icon="🛠", default=False) - - # Distribution pages - resources_page = st.Page("page/distribution/resources.py", title="Resources", icon="🔍", default=False) - provider_page = st.Page( - "page/distribution/providers.py", - title="API Providers", - icon="🔍", - default=False, - ) - - pg = st.navigation( - { - "Playground": [ - chat_page, - rag_page, - tool_page, - application_evaluation_page, - native_evaluation_page, - ], - "Inspect": [provider_page, resources_page], - }, - expanded=False, - ) - pg.run() - - -if __name__ == "__main__": - main() diff --git a/src/llama_stack/core/ui/modules/__init__.py b/src/llama_stack/core/ui/modules/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/src/llama_stack/core/ui/modules/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/src/llama_stack/core/ui/modules/api.py b/src/llama_stack/core/ui/modules/api.py deleted file mode 100644 index 9db87b280..000000000 --- a/src/llama_stack/core/ui/modules/api.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os - -from llama_stack_client import LlamaStackClient - - -class LlamaStackApi: - def __init__(self): - self.client = LlamaStackClient( - base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"), - provider_data={ - "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""), - "together_api_key": os.environ.get("TOGETHER_API_KEY", ""), - "sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""), - "openai_api_key": os.environ.get("OPENAI_API_KEY", ""), - "tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""), - }, - ) - - def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None): - """Run scoring on a single row""" - if not scoring_params: - scoring_params = dict.fromkeys(scoring_function_ids) - return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params) - - -llama_stack_api = LlamaStackApi() diff --git a/src/llama_stack/core/ui/modules/utils.py b/src/llama_stack/core/ui/modules/utils.py deleted file mode 100644 index 67cce98fa..000000000 --- a/src/llama_stack/core/ui/modules/utils.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import base64 -import os - -import pandas as pd -import streamlit as st - - -def process_dataset(file): - if file is None: - return "No file uploaded", None - - try: - # Determine file type and read accordingly - file_ext = os.path.splitext(file.name)[1].lower() - if file_ext == ".csv": - df = pd.read_csv(file) - elif file_ext in [".xlsx", ".xls"]: - df = pd.read_excel(file) - else: - return "Unsupported file format. Please upload a CSV or Excel file.", None - - return df - - except Exception as e: - st.error(f"Error processing file: {str(e)}") - return None - - -def data_url_from_file(file) -> str: - file_content = file.getvalue() - base64_content = base64.b64encode(file_content).decode("utf-8") - mime_type = file.type - - data_url = f"data:{mime_type};base64,{base64_content}" - - return data_url diff --git a/src/llama_stack/core/ui/page/__init__.py b/src/llama_stack/core/ui/page/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/src/llama_stack/core/ui/page/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/src/llama_stack/core/ui/page/distribution/__init__.py b/src/llama_stack/core/ui/page/distribution/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/src/llama_stack/core/ui/page/distribution/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/src/llama_stack/core/ui/page/distribution/datasets.py b/src/llama_stack/core/ui/page/distribution/datasets.py deleted file mode 100644 index aab0901ac..000000000 --- a/src/llama_stack/core/ui/page/distribution/datasets.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def datasets(): - st.header("Datasets") - - datasets_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()} - if len(datasets_info) > 0: - selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys())) - st.json(datasets_info[selected_dataset], expanded=True) diff --git a/src/llama_stack/core/ui/page/distribution/eval_tasks.py b/src/llama_stack/core/ui/page/distribution/eval_tasks.py deleted file mode 100644 index 1a0ce502b..000000000 --- a/src/llama_stack/core/ui/page/distribution/eval_tasks.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def benchmarks(): - # Benchmarks Section - st.header("Benchmarks") - - benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()} - - if len(benchmarks_info) > 0: - selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect") - st.json(benchmarks_info[selected_benchmark], expanded=True) diff --git a/src/llama_stack/core/ui/page/distribution/models.py b/src/llama_stack/core/ui/page/distribution/models.py deleted file mode 100644 index e00b327ae..000000000 --- a/src/llama_stack/core/ui/page/distribution/models.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def models(): - # Models Section - st.header("Models") - models_info = {m.id: m.model_dump() for m in llama_stack_api.client.models.list()} - - selected_model = st.selectbox("Select a model", list(models_info.keys())) - st.json(models_info[selected_model]) diff --git a/src/llama_stack/core/ui/page/distribution/providers.py b/src/llama_stack/core/ui/page/distribution/providers.py deleted file mode 100644 index 3ec6026d1..000000000 --- a/src/llama_stack/core/ui/page/distribution/providers.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def providers(): - st.header("🔍 API Providers") - apis_providers_lst = llama_stack_api.client.providers.list() - api_to_providers = {} - for api_provider in apis_providers_lst: - if api_provider.api in api_to_providers: - api_to_providers[api_provider.api].append(api_provider) - else: - api_to_providers[api_provider.api] = [api_provider] - - for api in api_to_providers.keys(): - st.markdown(f"###### {api}") - st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500) - - -providers() diff --git a/src/llama_stack/core/ui/page/distribution/resources.py b/src/llama_stack/core/ui/page/distribution/resources.py deleted file mode 100644 index 6e7122ceb..000000000 --- a/src/llama_stack/core/ui/page/distribution/resources.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from streamlit_option_menu import option_menu - -from llama_stack.core.ui.page.distribution.datasets import datasets -from llama_stack.core.ui.page.distribution.eval_tasks import benchmarks -from llama_stack.core.ui.page.distribution.models import models -from llama_stack.core.ui.page.distribution.scoring_functions import scoring_functions -from llama_stack.core.ui.page.distribution.shields import shields - - -def resources_page(): - options = [ - "Models", - "Shields", - "Scoring Functions", - "Datasets", - "Benchmarks", - ] - icons = ["magic", "shield", "file-bar-graph", "database", "list-task"] - selected_resource = option_menu( - None, - options, - icons=icons, - orientation="horizontal", - styles={ - "nav-link": { - "font-size": "12px", - }, - }, - ) - if selected_resource == "Benchmarks": - benchmarks() - elif selected_resource == "Datasets": - datasets() - elif selected_resource == "Models": - models() - elif selected_resource == "Scoring Functions": - scoring_functions() - elif selected_resource == "Shields": - shields() - - -resources_page() diff --git a/src/llama_stack/core/ui/page/distribution/scoring_functions.py b/src/llama_stack/core/ui/page/distribution/scoring_functions.py deleted file mode 100644 index 2a5196fa9..000000000 --- a/src/llama_stack/core/ui/page/distribution/scoring_functions.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def scoring_functions(): - st.header("Scoring Functions") - - scoring_functions_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.scoring_functions.list()} - - selected_scoring_function = st.selectbox("Select a scoring function", list(scoring_functions_info.keys())) - st.json(scoring_functions_info[selected_scoring_function], expanded=True) diff --git a/src/llama_stack/core/ui/page/distribution/shields.py b/src/llama_stack/core/ui/page/distribution/shields.py deleted file mode 100644 index ecce2f12b..000000000 --- a/src/llama_stack/core/ui/page/distribution/shields.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def shields(): - # Shields Section - st.header("Shields") - - shields_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()} - - selected_shield = st.selectbox("Select a shield", list(shields_info.keys())) - st.json(shields_info[selected_shield]) diff --git a/src/llama_stack/core/ui/page/evaluations/__init__.py b/src/llama_stack/core/ui/page/evaluations/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/src/llama_stack/core/ui/page/evaluations/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/src/llama_stack/core/ui/page/evaluations/app_eval.py b/src/llama_stack/core/ui/page/evaluations/app_eval.py deleted file mode 100644 index 07e6349c9..000000000 --- a/src/llama_stack/core/ui/page/evaluations/app_eval.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json - -import pandas as pd -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api -from llama_stack.core.ui.modules.utils import process_dataset - - -def application_evaluation_page(): - st.set_page_config(page_title="Evaluations (Scoring)", page_icon="🦙") - st.title("📊 Evaluations (Scoring)") - - # File uploader - uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"]) - - if uploaded_file is None: - st.error("No file uploaded") - return - - # Process uploaded file - df = process_dataset(uploaded_file) - if df is None: - st.error("Error processing file") - return - - # Display dataset information - st.success("Dataset loaded successfully!") - - # Display dataframe preview - st.subheader("Dataset Preview") - st.dataframe(df) - - # Select Scoring Functions to Run Evaluation On - st.subheader("Select Scoring Functions") - scoring_functions = llama_stack_api.client.scoring_functions.list() - scoring_functions = {sf.identifier: sf for sf in scoring_functions} - scoring_functions_names = list(scoring_functions.keys()) - selected_scoring_functions = st.multiselect( - "Choose one or more scoring functions", - options=scoring_functions_names, - help="Choose one or more scoring functions.", - ) - - available_models = llama_stack_api.client.models.list() - available_models = [m.identifier for m in available_models] - - scoring_params = {} - if selected_scoring_functions: - st.write("Selected:") - for scoring_fn_id in selected_scoring_functions: - scoring_fn = scoring_functions[scoring_fn_id] - st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}") - new_params = None - if scoring_fn.params: - new_params = {} - for param_name, param_value in scoring_fn.params.to_dict().items(): - if param_name == "type": - new_params[param_name] = param_value - continue - - if param_name == "judge_model": - value = st.selectbox( - f"Select **{param_name}** for {scoring_fn_id}", - options=available_models, - index=0, - key=f"{scoring_fn_id}_{param_name}", - ) - new_params[param_name] = value - else: - value = st.text_area( - f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format", - value=json.dumps(param_value, indent=2), - height=80, - ) - try: - new_params[param_name] = json.loads(value) - except json.JSONDecodeError: - st.error(f"Invalid JSON for **{param_name}** in {scoring_fn_id}") - - st.json(new_params) - scoring_params[scoring_fn_id] = new_params - - # Add run evaluation button & slider - total_rows = len(df) - num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows) - - if st.button("Run Evaluation"): - progress_text = "Running evaluation..." - progress_bar = st.progress(0, text=progress_text) - rows = df.to_dict(orient="records") - if num_rows < total_rows: - rows = rows[:num_rows] - - # Create separate containers for progress text and results - progress_text_container = st.empty() - results_container = st.empty() - output_res = {} - for i, r in enumerate(rows): - # Update progress - progress = i / len(rows) - progress_bar.progress(progress, text=progress_text) - - # Run evaluation for current row - score_res = llama_stack_api.run_scoring( - r, - scoring_function_ids=selected_scoring_functions, - scoring_params=scoring_params, - ) - - for k in r.keys(): - if k not in output_res: - output_res[k] = [] - output_res[k].append(r[k]) - - for fn_id in selected_scoring_functions: - if fn_id not in output_res: - output_res[fn_id] = [] - output_res[fn_id].append(score_res.results[fn_id].score_rows[0]) - - # Display current row results using separate containers - progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})") - results_container.json( - score_res.to_json(), - expanded=2, - ) - - progress_bar.progress(1.0, text="Evaluation complete!") - - # Display results in dataframe - if output_res: - output_df = pd.DataFrame(output_res) - st.subheader("Evaluation Results") - st.dataframe(output_df) - - -application_evaluation_page() diff --git a/src/llama_stack/core/ui/page/evaluations/native_eval.py b/src/llama_stack/core/ui/page/evaluations/native_eval.py deleted file mode 100644 index 2bef63b2f..000000000 --- a/src/llama_stack/core/ui/page/evaluations/native_eval.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json - -import pandas as pd -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def select_benchmark_1(): - # Select Benchmarks - st.subheader("1. Choose An Eval Task") - benchmarks = llama_stack_api.client.benchmarks.list() - benchmarks = {et.identifier: et for et in benchmarks} - benchmarks_names = list(benchmarks.keys()) - selected_benchmark = st.selectbox( - "Choose an eval task.", - options=benchmarks_names, - help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.", - ) - with st.expander("View Eval Task"): - st.json(benchmarks[selected_benchmark], expanded=True) - - st.session_state["selected_benchmark"] = selected_benchmark - st.session_state["benchmarks"] = benchmarks - if st.button("Confirm", key="confirm_1"): - st.session_state["selected_benchmark_1_next"] = True - - -def define_eval_candidate_2(): - if not st.session_state.get("selected_benchmark_1_next", None): - return - - st.subheader("2. Define Eval Candidate") - st.info( - """ - Define the configurations for the evaluation candidate model or agent used for generation. - Select "model" if you want to run generation with inference API, or "agent" if you want to run generation with agent API through specifying AgentConfig. - """ - ) - with st.expander("Define Eval Candidate", expanded=True): - # Define Eval Candidate - candidate_type = st.radio("Candidate Type", ["model", "agent"]) - - available_models = llama_stack_api.client.models.list() - available_models = [model.identifier for model in available_models] - selected_model = st.selectbox( - "Choose a model", - available_models, - index=0, - ) - - # Sampling Parameters - st.markdown("##### Sampling Parameters") - temperature = st.slider( - "Temperature", - min_value=0.0, - max_value=1.0, - value=0.0, - step=0.1, - help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable", - ) - top_p = st.slider( - "Top P", - min_value=0.0, - max_value=1.0, - value=0.95, - step=0.1, - ) - max_tokens = st.slider( - "Max Tokens", - min_value=0, - max_value=4096, - value=512, - step=1, - help="The maximum number of tokens to generate", - ) - repetition_penalty = st.slider( - "Repetition Penalty", - min_value=1.0, - max_value=2.0, - value=1.0, - step=0.1, - help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.", - ) - if candidate_type == "model": - if temperature > 0.0: - strategy = { - "type": "top_p", - "temperature": temperature, - "top_p": top_p, - } - else: - strategy = {"type": "greedy"} - - eval_candidate = { - "type": "model", - "model": selected_model, - "sampling_params": { - "strategy": strategy, - "max_tokens": max_tokens, - "repetition_penalty": repetition_penalty, - }, - } - elif candidate_type == "agent": - system_prompt = st.text_area( - "System Prompt", - value="You are a helpful AI assistant.", - help="Initial instructions given to the AI to set its behavior and context", - ) - tools_json = st.text_area( - "Tools Configuration (JSON)", - value=json.dumps( - [ - { - "type": "brave_search", - "engine": "brave", - "api_key": "ENTER_BRAVE_API_KEY_HERE", - } - ] - ), - help="Enter tool configurations in JSON format. Each tool should have a name, description, and parameters.", - height=200, - ) - try: - tools = json.loads(tools_json) - except json.JSONDecodeError: - st.error("Invalid JSON format for tools configuration") - tools = [] - eval_candidate = { - "type": "agent", - "config": { - "model": selected_model, - "instructions": system_prompt, - "tools": tools, - "tool_choice": "auto", - "tool_prompt_format": "json", - "input_shields": [], - "output_shields": [], - "enable_session_persistence": False, - }, - } - st.session_state["eval_candidate"] = eval_candidate - - if st.button("Confirm", key="confirm_2"): - st.session_state["selected_eval_candidate_2_next"] = True - - -def run_evaluation_3(): - if not st.session_state.get("selected_eval_candidate_2_next", None): - return - - st.subheader("3. Run Evaluation") - # Add info box to explain configurations being used - st.info( - """ - Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button. - """ - ) - selected_benchmark = st.session_state["selected_benchmark"] - benchmarks = st.session_state["benchmarks"] - eval_candidate = st.session_state["eval_candidate"] - - dataset_id = benchmarks[selected_benchmark].dataset_id - rows = llama_stack_api.client.datasets.iterrows( - dataset_id=dataset_id, - ) - total_rows = len(rows.data) - # Add number of examples control - num_rows = st.number_input( - "Number of Examples to Evaluate", - min_value=1, - max_value=total_rows, - value=5, - help="Number of examples from the dataset to evaluate. ", - ) - - benchmark_config = { - "type": "benchmark", - "eval_candidate": eval_candidate, - "scoring_params": {}, - } - - with st.expander("View Evaluation Task", expanded=True): - st.json(benchmarks[selected_benchmark], expanded=True) - with st.expander("View Evaluation Task Configuration", expanded=True): - st.json(benchmark_config, expanded=True) - - # Add run button and handle evaluation - if st.button("Run Evaluation"): - progress_text = "Running evaluation..." - progress_bar = st.progress(0, text=progress_text) - rows = rows.data - if num_rows < total_rows: - rows = rows[:num_rows] - - # Create separate containers for progress text and results - progress_text_container = st.empty() - results_container = st.empty() - output_res = {} - for i, r in enumerate(rows): - # Update progress - progress = i / len(rows) - progress_bar.progress(progress, text=progress_text) - # Run evaluation for current row - eval_res = llama_stack_api.client.eval.evaluate_rows( - benchmark_id=selected_benchmark, - input_rows=[r], - scoring_functions=benchmarks[selected_benchmark].scoring_functions, - benchmark_config=benchmark_config, - ) - - for k in r.keys(): - if k not in output_res: - output_res[k] = [] - output_res[k].append(r[k]) - - for k in eval_res.generations[0].keys(): - if k not in output_res: - output_res[k] = [] - output_res[k].append(eval_res.generations[0][k]) - - for scoring_fn in benchmarks[selected_benchmark].scoring_functions: - if scoring_fn not in output_res: - output_res[scoring_fn] = [] - output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0]) - - progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})") - results_container.json(eval_res, expanded=2) - - progress_bar.progress(1.0, text="Evaluation complete!") - # Display results in dataframe - if output_res: - output_df = pd.DataFrame(output_res) - st.subheader("Evaluation Results") - st.dataframe(output_df) - - -def native_evaluation_page(): - st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙") - st.title("📊 Evaluations (Generation + Scoring)") - - select_benchmark_1() - define_eval_candidate_2() - run_evaluation_3() - - -native_evaluation_page() diff --git a/src/llama_stack/core/ui/page/playground/__init__.py b/src/llama_stack/core/ui/page/playground/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/src/llama_stack/core/ui/page/playground/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/src/llama_stack/core/ui/page/playground/chat.py b/src/llama_stack/core/ui/page/playground/chat.py deleted file mode 100644 index c813f05dc..000000000 --- a/src/llama_stack/core/ui/page/playground/chat.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - -# Sidebar configurations -with st.sidebar: - st.header("Configuration") - available_models = llama_stack_api.client.models.list() - available_models = [ - model.id - for model in available_models - if model.custom_metadata and model.custom_metadata.get("model_type") == "llm" - ] - selected_model = st.selectbox( - "Choose a model", - available_models, - index=0, - ) - - temperature = st.slider( - "Temperature", - min_value=0.0, - max_value=1.0, - value=0.0, - step=0.1, - help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable", - ) - - top_p = st.slider( - "Top P", - min_value=0.0, - max_value=1.0, - value=0.95, - step=0.1, - ) - - max_tokens = st.slider( - "Max Tokens", - min_value=0, - max_value=4096, - value=512, - step=1, - help="The maximum number of tokens to generate", - ) - - repetition_penalty = st.slider( - "Repetition Penalty", - min_value=1.0, - max_value=2.0, - value=1.0, - step=0.1, - help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.", - ) - - stream = st.checkbox("Stream", value=True) - system_prompt = st.text_area( - "System Prompt", - value="You are a helpful AI assistant.", - help="Initial instructions given to the AI to set its behavior and context", - ) - - # Add clear chat button to sidebar - if st.button("Clear Chat", use_container_width=True): - st.session_state.messages = [] - st.rerun() - - -# Main chat interface -st.title("🦙 Chat") - - -# Initialize chat history -if "messages" not in st.session_state: - st.session_state.messages = [] - -# Display chat messages -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - -# Chat input -if prompt := st.chat_input("Example: What is Llama Stack?"): - # Add user message to chat history - st.session_state.messages.append({"role": "user", "content": prompt}) - - # Display user message - with st.chat_message("user"): - st.markdown(prompt) - - # Display assistant response - with st.chat_message("assistant"): - message_placeholder = st.empty() - full_response = "" - - if temperature > 0.0: - strategy = { - "type": "top_p", - "temperature": temperature, - "top_p": top_p, - } - else: - strategy = {"type": "greedy"} - - response = llama_stack_api.client.inference.chat_completion( - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": prompt}, - ], - model_id=selected_model, - stream=stream, - sampling_params={ - "strategy": strategy, - "max_tokens": max_tokens, - "repetition_penalty": repetition_penalty, - }, - ) - - if stream: - for chunk in response: - if chunk.event.event_type == "progress": - full_response += chunk.event.delta.text - message_placeholder.markdown(full_response + "▌") - message_placeholder.markdown(full_response) - else: - full_response = response.completion_message.content - message_placeholder.markdown(full_response) - - st.session_state.messages.append({"role": "assistant", "content": full_response}) diff --git a/src/llama_stack/core/ui/page/playground/tools.py b/src/llama_stack/core/ui/page/playground/tools.py deleted file mode 100644 index 16fd464ee..000000000 --- a/src/llama_stack/core/ui/page/playground/tools.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import enum -import json -import uuid - -import streamlit as st -from llama_stack_client import Agent -from llama_stack_client.lib.agents.react.agent import ReActAgent -from llama_stack_client.lib.agents.react.tool_parser import ReActOutput - -from llama_stack.core.ui.modules.api import llama_stack_api - - -class AgentType(enum.Enum): - REGULAR = "Regular" - REACT = "ReAct" - - -def tool_chat_page(): - st.title("🛠 Tools") - - client = llama_stack_api.client - models = client.models.list() - model_list = [model.identifier for model in models if model.api_model_type == "llm"] - - tool_groups = client.toolgroups.list() - tool_groups_list = [tool_group.identifier for tool_group in tool_groups] - mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")] - builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")] - selected_vector_stores = [] - - def reset_agent(): - st.session_state.clear() - st.cache_resource.clear() - - with st.sidebar: - st.title("Configuration") - st.subheader("Model") - model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed") - - st.subheader("Available ToolGroups") - - toolgroup_selection = st.pills( - label="Built-in tools", - options=builtin_tools_list, - selection_mode="multi", - on_change=reset_agent, - format_func=lambda tool: "".join(tool.split("::")[1:]), - help="List of built-in tools from your llama stack server.", - ) - - if "builtin::rag" in toolgroup_selection: - vector_stores = llama_stack_api.client.vector_stores.list() or [] - if not vector_stores: - st.info("No vector databases available for selection.") - vector_stores = [vector_store.identifier for vector_store in vector_stores] - selected_vector_stores = st.multiselect( - label="Select Document Collections to use in RAG queries", - options=vector_stores, - on_change=reset_agent, - ) - - mcp_selection = st.pills( - label="MCP Servers", - options=mcp_tools_list, - selection_mode="multi", - on_change=reset_agent, - format_func=lambda tool: "".join(tool.split("::")[1:]), - help="List of MCP servers registered to your llama stack server.", - ) - - toolgroup_selection.extend(mcp_selection) - - grouped_tools = {} - total_tools = 0 - - for toolgroup_id in toolgroup_selection: - tools = client.tools.list(toolgroup_id=toolgroup_id) - grouped_tools[toolgroup_id] = [tool.name for tool in tools] - total_tools += len(tools) - - st.markdown(f"Active Tools: 🛠 {total_tools}") - - for group_id, tools in grouped_tools.items(): - with st.expander(f"🔧 Tools from `{group_id}`"): - for idx, tool in enumerate(tools, start=1): - st.markdown(f"{idx}. `{tool.split(':')[-1]}`") - - st.subheader("Agent Configurations") - st.subheader("Agent Type") - agent_type = st.radio( - label="Select Agent Type", - options=["Regular", "ReAct"], - on_change=reset_agent, - ) - - if agent_type == "ReAct": - agent_type = AgentType.REACT - else: - agent_type = AgentType.REGULAR - - max_tokens = st.slider( - "Max Tokens", - min_value=0, - max_value=4096, - value=512, - step=64, - help="The maximum number of tokens to generate", - on_change=reset_agent, - ) - - for i, tool_name in enumerate(toolgroup_selection): - if tool_name == "builtin::rag": - tool_dict = dict( - name="builtin::rag", - args={ - "vector_store_ids": list(selected_vector_stores), - }, - ) - toolgroup_selection[i] = tool_dict - - @st.cache_resource - def create_agent(): - if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT: - return ReActAgent( - client=client, - model=model, - tools=toolgroup_selection, - response_format={ - "type": "json_schema", - "json_schema": ReActOutput.model_json_schema(), - }, - sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens}, - ) - else: - return Agent( - client, - model=model, - instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.", - tools=toolgroup_selection, - sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens}, - ) - - st.session_state.agent_type = agent_type - - agent = create_agent() - - if "agent_session_id" not in st.session_state: - st.session_state["agent_session_id"] = agent.create_session(session_name=f"tool_demo_{uuid.uuid4()}") - - session_id = st.session_state["agent_session_id"] - - if "messages" not in st.session_state: - st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}] - - for msg in st.session_state.messages: - with st.chat_message(msg["role"]): - st.markdown(msg["content"]) - - if prompt := st.chat_input(placeholder=""): - with st.chat_message("user"): - st.markdown(prompt) - - st.session_state.messages.append({"role": "user", "content": prompt}) - - turn_response = agent.create_turn( - session_id=session_id, - messages=[{"role": "user", "content": prompt}], - stream=True, - ) - - def response_generator(turn_response): - if st.session_state.get("agent_type") == AgentType.REACT: - return _handle_react_response(turn_response) - else: - return _handle_regular_response(turn_response) - - def _handle_react_response(turn_response): - current_step_content = "" - final_answer = None - tool_results = [] - - for response in turn_response: - if not hasattr(response.event, "payload"): - yield ( - "\n\n🚨 :red[_Llama Stack server Error:_]\n" - "The response received is missing an expected `payload` attribute.\n" - "This could indicate a malformed response or an internal issue within the server.\n\n" - f"Error details: {response}" - ) - return - - payload = response.event.payload - - if payload.event_type == "step_progress" and hasattr(payload.delta, "text"): - current_step_content += payload.delta.text - continue - - if payload.event_type == "step_complete": - step_details = payload.step_details - - if step_details.step_type == "inference": - yield from _process_inference_step(current_step_content, tool_results, final_answer) - current_step_content = "" - elif step_details.step_type == "tool_execution": - tool_results = _process_tool_execution(step_details, tool_results) - current_step_content = "" - else: - current_step_content = "" - - if not final_answer and tool_results: - yield from _format_tool_results_summary(tool_results) - - def _process_inference_step(current_step_content, tool_results, final_answer): - try: - react_output_data = json.loads(current_step_content) - thought = react_output_data.get("thought") - action = react_output_data.get("action") - answer = react_output_data.get("answer") - - if answer and answer != "null" and answer is not None: - final_answer = answer - - if thought: - with st.expander("🤔 Thinking...", expanded=False): - st.markdown(f":grey[__{thought}__]") - - if action and isinstance(action, dict): - tool_name = action.get("tool_name") - tool_params = action.get("tool_params") - with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False): - st.json(tool_params) - - if answer and answer != "null" and answer is not None: - yield f"\n\n✅ **Final Answer:**\n{answer}" - - except json.JSONDecodeError: - yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```" - except Exception as e: - yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```" - - return final_answer - - def _process_tool_execution(step_details, tool_results): - try: - if hasattr(step_details, "tool_responses") and step_details.tool_responses: - for tool_response in step_details.tool_responses: - tool_name = tool_response.tool_name - content = tool_response.content - tool_results.append((tool_name, content)) - with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False): - try: - parsed_content = json.loads(content) - st.json(parsed_content) - except json.JSONDecodeError: - st.code(content, language=None) - else: - with st.expander("⚙️ Observation", expanded=False): - st.markdown(":grey[_Tool execution step completed, but no response data found._]") - except Exception as e: - with st.expander("⚙️ Error in Tool Execution", expanded=False): - st.markdown(f":red[_Error processing tool execution: {str(e)}_]") - - return tool_results - - def _format_tool_results_summary(tool_results): - yield "\n\n**Here's what I found:**\n" - for tool_name, content in tool_results: - try: - parsed_content = json.loads(content) - - if tool_name == "web_search" and "top_k" in parsed_content: - yield from _format_web_search_results(parsed_content) - elif "results" in parsed_content and isinstance(parsed_content["results"], list): - yield from _format_results_list(parsed_content["results"]) - elif isinstance(parsed_content, dict) and len(parsed_content) > 0: - yield from _format_dict_results(parsed_content) - elif isinstance(parsed_content, list) and len(parsed_content) > 0: - yield from _format_list_results(parsed_content) - except json.JSONDecodeError: - yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n" - except (TypeError, AttributeError, KeyError, IndexError) as e: - print(f"Error processing {tool_name} result: {type(e).__name__}: {e}") - - def _format_web_search_results(parsed_content): - for i, result in enumerate(parsed_content["top_k"], 1): - if i <= 3: - title = result.get("title", "Untitled") - url = result.get("url", "") - content_text = result.get("content", "").strip() - yield f"\n- **{title}**\n {content_text}\n [Source]({url})\n" - - def _format_results_list(results): - for i, result in enumerate(results, 1): - if i <= 3: - if isinstance(result, dict): - name = result.get("name", result.get("title", "Result " + str(i))) - description = result.get("description", result.get("content", result.get("summary", ""))) - yield f"\n- **{name}**\n {description}\n" - else: - yield f"\n- {result}\n" - - def _format_dict_results(parsed_content): - yield "\n```\n" - for key, value in list(parsed_content.items())[:5]: - if isinstance(value, str) and len(value) < 100: - yield f"{key}: {value}\n" - else: - yield f"{key}: [Complex data]\n" - yield "```\n" - - def _format_list_results(parsed_content): - yield "\n" - for _, item in enumerate(parsed_content[:3], 1): - if isinstance(item, str): - yield f"- {item}\n" - elif isinstance(item, dict) and "text" in item: - yield f"- {item['text']}\n" - elif isinstance(item, dict) and len(item) > 0: - first_value = next(iter(item.values())) - if isinstance(first_value, str) and len(first_value) < 100: - yield f"- {first_value}\n" - - def _handle_regular_response(turn_response): - for response in turn_response: - if hasattr(response.event, "payload"): - print(response.event.payload) - if response.event.payload.event_type == "step_progress": - if hasattr(response.event.payload.delta, "text"): - yield response.event.payload.delta.text - if response.event.payload.event_type == "step_complete": - if response.event.payload.step_details.step_type == "tool_execution": - if response.event.payload.step_details.tool_calls: - tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name) - yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n' - else: - yield "No tool_calls present in step_details" - else: - yield f"Error occurred in the Llama Stack Cluster: {response}" - - with st.chat_message("assistant"): - response_content = st.write_stream(response_generator(turn_response)) - - st.session_state.messages.append({"role": "assistant", "content": response_content}) - - -tool_chat_page() diff --git a/src/llama_stack/core/ui/requirements.txt b/src/llama_stack/core/ui/requirements.txt deleted file mode 100644 index 53a1e7bf3..000000000 --- a/src/llama_stack/core/ui/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -llama-stack>=0.2.1 -llama-stack-client>=0.2.1 -pandas -streamlit -streamlit-option-menu diff --git a/src/llama_stack/core/utils/config_resolution.py b/src/llama_stack/core/utils/config_resolution.py index fcf057db6..2a85837b6 100644 --- a/src/llama_stack/core/utils/config_resolution.py +++ b/src/llama_stack/core/utils/config_resolution.py @@ -52,7 +52,17 @@ def resolve_config_or_distro( logger.debug(f"Using distribution: {distro_config}") return distro_config - # Strategy 3: Try as built distribution name + # Strategy 3: Try as distro config path (if no .yaml extension and contains a slash) + # eg: starter::run-with-postgres-store.yaml + # Use :: to avoid slash and confusion with a filesystem path + if "::" in config_or_distro: + distro_name, config_name = config_or_distro.split("::") + distro_config = _get_distro_config_path(distro_name, config_name) + if distro_config.exists(): + logger.info(f"Using distribution: {distro_config}") + return distro_config + + # Strategy 4: Try as built distribution name distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml" if distrib_config.exists(): logger.debug(f"Using built distribution: {distrib_config}") @@ -63,13 +73,15 @@ def resolve_config_or_distro( logger.debug(f"Using built distribution: {distrib_config}") return distrib_config - # Strategy 4: Failed - provide helpful error + # Strategy 5: Failed - provide helpful error raise ValueError(_format_resolution_error(config_or_distro, mode)) -def _get_distro_config_path(distro_name: str, mode: Mode) -> Path: +def _get_distro_config_path(distro_name: str, mode: str) -> Path: """Get the config file path for a distro.""" - return DISTRO_DIR / distro_name / f"{mode}.yaml" + if not mode.endswith(".yaml"): + mode = f"{mode}.yaml" + return DISTRO_DIR / distro_name / mode def _format_resolution_error(config_or_distro: str, mode: Mode) -> str: diff --git a/src/llama_stack/core/utils/exec.py b/src/llama_stack/core/utils/exec.py index 12fb82d01..98964db2c 100644 --- a/src/llama_stack/core/utils/exec.py +++ b/src/llama_stack/core/utils/exec.py @@ -84,6 +84,15 @@ def run_command(command: list[str]) -> int: text=True, check=False, ) + + # Print stdout and stderr if command failed + if result.returncode != 0: + log.error(f"Command {' '.join(command)} failed with returncode {result.returncode}") + if result.stdout: + log.error(f"STDOUT: {result.stdout}") + if result.stderr: + log.error(f"STDERR: {result.stderr}") + return result.returncode except subprocess.SubprocessError as e: log.error(f"Subprocess error: {e}") diff --git a/src/llama_stack/distributions/ci-tests/build.yaml b/src/llama_stack/distributions/ci-tests/build.yaml index c01e415a9..f29ac7712 100644 --- a/src/llama_stack/distributions/ci-tests/build.yaml +++ b/src/llama_stack/distributions/ci-tests/build.yaml @@ -56,4 +56,5 @@ image_type: venv additional_pip_packages: - aiosqlite - asyncpg +- psycopg2-binary - sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py index ab102f5f3..c06b1b98d 100644 --- a/src/llama_stack/distributions/ci-tests/ci_tests.py +++ b/src/llama_stack/distributions/ci-tests/ci_tests.py @@ -13,5 +13,6 @@ from ..starter.starter import get_distribution_template as get_starter_distribut def get_distribution_template() -> DistributionTemplate: template = get_starter_distribution_template(name="ci-tests") template.description = "CI tests for Llama Stack" + template.run_configs.pop("run-with-postgres-store.yaml", None) return template diff --git a/src/llama_stack/distributions/postgres-demo/__init__.py b/src/llama_stack/distributions/postgres-demo/__init__.py deleted file mode 100644 index 81473cb73..000000000 --- a/src/llama_stack/distributions/postgres-demo/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .postgres_demo import get_distribution_template # noqa: F401 diff --git a/src/llama_stack/distributions/postgres-demo/build.yaml b/src/llama_stack/distributions/postgres-demo/build.yaml deleted file mode 100644 index 063dc3999..000000000 --- a/src/llama_stack/distributions/postgres-demo/build.yaml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers - providers: - inference: - - provider_type: remote::vllm - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: remote::chromadb - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol -image_type: venv -additional_pip_packages: -- asyncpg -- psycopg2-binary -- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/postgres-demo/postgres_demo.py b/src/llama_stack/distributions/postgres-demo/postgres_demo.py deleted file mode 100644 index 876370ef3..000000000 --- a/src/llama_stack/distributions/postgres-demo/postgres_demo.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.apis.models import ModelType -from llama_stack.core.datatypes import ( - BuildProvider, - ModelInput, - Provider, - ShieldInput, - ToolGroupInput, -) -from llama_stack.distributions.template import ( - DistributionTemplate, - RunConfigSettings, -) -from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig -from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig -from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig -from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig - - -def get_distribution_template() -> DistributionTemplate: - inference_providers = [ - Provider( - provider_id="vllm-inference", - provider_type="remote::vllm", - config=VLLMInferenceAdapterConfig.sample_run_config( - url="${env.VLLM_URL:=http://localhost:8000/v1}", - ), - ), - ] - providers = { - "inference": [ - BuildProvider(provider_type="remote::vllm"), - BuildProvider(provider_type="inline::sentence-transformers"), - ], - "vector_io": [BuildProvider(provider_type="remote::chromadb")], - "safety": [BuildProvider(provider_type="inline::llama-guard")], - "agents": [BuildProvider(provider_type="inline::meta-reference")], - "tool_runtime": [ - BuildProvider(provider_type="remote::brave-search"), - BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), - BuildProvider(provider_type="remote::model-context-protocol"), - ], - } - name = "postgres-demo" - - vector_io_providers = [ - Provider( - provider_id="${env.ENABLE_CHROMADB:+chromadb}", - provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - url="${env.CHROMADB_URL:=}", - ), - ), - ] - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ] - - default_models = [ - ModelInput( - model_id="${env.INFERENCE_MODEL}", - provider_id="vllm-inference", - ) - ] - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - embedding_model = ModelInput( - model_id="nomic-embed-text-v1.5", - provider_id=embedding_provider.provider_id, - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 768, - }, - ) - return DistributionTemplate( - name=name, - distro_type="self_hosted", - description="Quick start template for running Llama Stack with several popular providers", - container_image=None, - template_path=None, - providers=providers, - available_models_by_provider={}, - run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": inference_providers + [embedding_provider], - "vector_io": vector_io_providers, - }, - default_models=default_models + [embedding_model], - default_tool_groups=default_tool_groups, - default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], - storage_backends={ - "kv_default": PostgresKVStoreConfig.sample_run_config( - table_name="llamastack_kvstore", - ), - "sql_default": PostgresSqlStoreConfig.sample_run_config(), - }, - ), - }, - run_config_env_vars={ - "LLAMA_STACK_PORT": ( - "8321", - "Port for the Llama Stack distribution server", - ), - }, - ) diff --git a/src/llama_stack/distributions/starter-gpu/build.yaml b/src/llama_stack/distributions/starter-gpu/build.yaml index b2e2a0c85..10cbb1389 100644 --- a/src/llama_stack/distributions/starter-gpu/build.yaml +++ b/src/llama_stack/distributions/starter-gpu/build.yaml @@ -57,4 +57,5 @@ image_type: venv additional_pip_packages: - aiosqlite - asyncpg +- psycopg2-binary - sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml new file mode 100644 index 000000000..6dbbc8716 --- /dev/null +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -0,0 +1,281 @@ +version: 2 +image_name: starter-gpu +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:=} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + api_base: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + post_training: + - provider_id: huggingface-gpu + provider_type: inline::huggingface-gpu + config: + checkpoint_format: huggingface + distributed_backend: null + device: cpu + dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_postgres +storage: + backends: + kv_postgres: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_postgres: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_postgres + inference: + table_name: inference_store + backend: sql_postgres + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_postgres + prompts: + namespace: prompts + backend: kv_postgres +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: [] +server: + port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/starter/build.yaml b/src/llama_stack/distributions/starter/build.yaml index baa80ef3e..acd51f773 100644 --- a/src/llama_stack/distributions/starter/build.yaml +++ b/src/llama_stack/distributions/starter/build.yaml @@ -57,4 +57,5 @@ image_type: venv additional_pip_packages: - aiosqlite - asyncpg +- psycopg2-binary - sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml new file mode 100644 index 000000000..530084bd9 --- /dev/null +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -0,0 +1,278 @@ +version: 2 +image_name: starter +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:=} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + api_base: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + post_training: + - provider_id: torchtune-cpu + provider_type: inline::torchtune-cpu + config: + checkpoint_format: meta + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_postgres +storage: + backends: + kv_postgres: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_postgres: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_postgres + inference: + table_name: inference_store + backend: sql_postgres + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_postgres + prompts: + namespace: prompts + backend: kv_postgres +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: [] +server: + port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index 49b7a2463..88cd3a4fe 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -17,6 +17,11 @@ from llama_stack.core.datatypes import ( ToolGroupInput, VectorStoresConfig, ) +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + SqlStoreReference, +) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings from llama_stack.providers.datatypes import RemoteProviderSpec @@ -36,6 +41,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import ( ) from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig +from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig @@ -181,6 +187,62 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: provider_shield_id="${env.CODE_SCANNER_MODEL:=}", ), ] + postgres_config = PostgresSqlStoreConfig.sample_run_config() + default_overrides = { + "inference": remote_inference_providers + [embedding_provider], + "vector_io": [ + Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="sqlite-vec", + provider_type="inline::sqlite-vec", + config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="${env.MILVUS_URL:+milvus}", + provider_type="inline::milvus", + config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="${env.CHROMADB_URL:+chromadb}", + provider_type="remote::chromadb", + config=ChromaVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}/", + url="${env.CHROMADB_URL:=}", + ), + ), + Provider( + provider_id="${env.PGVECTOR_DB:+pgvector}", + provider_type="remote::pgvector", + config=PGVectorVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + db="${env.PGVECTOR_DB:=}", + user="${env.PGVECTOR_USER:=}", + password="${env.PGVECTOR_PASSWORD:=}", + ), + ), + Provider( + provider_id="${env.QDRANT_URL:+qdrant}", + provider_type="remote::qdrant", + config=QdrantVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + url="${env.QDRANT_URL:=}", + ), + ), + Provider( + provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}", + provider_type="remote::weaviate", + config=WeaviateVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + cluster_url="${env.WEAVIATE_CLUSTER_URL:=}", + ), + ), + ], + "files": [files_provider], + } return DistributionTemplate( name=name, @@ -189,64 +251,10 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - additional_pip_packages=PostgresSqlStoreConfig.pip_packages(), + additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())), run_configs={ "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": remote_inference_providers + [embedding_provider], - "vector_io": [ - Provider( - provider_id="faiss", - provider_type="inline::faiss", - config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ), - Provider( - provider_id="sqlite-vec", - provider_type="inline::sqlite-vec", - config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ), - Provider( - provider_id="${env.MILVUS_URL:+milvus}", - provider_type="inline::milvus", - config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ), - Provider( - provider_id="${env.CHROMADB_URL:+chromadb}", - provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}/", - url="${env.CHROMADB_URL:=}", - ), - ), - Provider( - provider_id="${env.PGVECTOR_DB:+pgvector}", - provider_type="remote::pgvector", - config=PGVectorVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - db="${env.PGVECTOR_DB:=}", - user="${env.PGVECTOR_USER:=}", - password="${env.PGVECTOR_PASSWORD:=}", - ), - ), - Provider( - provider_id="${env.QDRANT_URL:+qdrant}", - provider_type="remote::qdrant", - config=QdrantVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - url="${env.QDRANT_URL:=}", - ), - ), - Provider( - provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}", - provider_type="remote::weaviate", - config=WeaviateVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - cluster_url="${env.WEAVIATE_CLUSTER_URL:=}", - ), - ), - ], - "files": [files_provider], - }, + provider_overrides=default_overrides, default_models=[], default_tool_groups=default_tool_groups, default_shields=default_shields, @@ -261,6 +269,55 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: default_shield_id="llama-guard", ), ), + "run-with-postgres-store.yaml": RunConfigSettings( + provider_overrides={ + **default_overrides, + "agents": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + config=dict( + persistence_store=postgres_config, + responses_store=postgres_config, + ), + ) + ], + "batches": [ + Provider( + provider_id="reference", + provider_type="inline::reference", + config=dict( + kvstore=KVStoreReference( + backend="kv_postgres", + namespace="batches", + ).model_dump(exclude_none=True), + ), + ) + ], + }, + storage_backends={ + "kv_postgres": PostgresKVStoreConfig.sample_run_config(), + "sql_postgres": postgres_config, + }, + storage_stores={ + "metadata": KVStoreReference( + backend="kv_postgres", + namespace="registry", + ).model_dump(exclude_none=True), + "inference": InferenceStoreReference( + backend="sql_postgres", + table_name="inference_store", + ).model_dump(exclude_none=True), + "conversations": SqlStoreReference( + backend="sql_postgres", + table_name="openai_conversations", + ).model_dump(exclude_none=True), + "prompts": KVStoreReference( + backend="kv_postgres", + namespace="prompts", + ).model_dump(exclude_none=True), + }, + ), }, run_config_env_vars={ "LLAMA_STACK_PORT": ( diff --git a/src/llama_stack/providers/remote/inference/passthrough/__init__.py b/src/llama_stack/providers/remote/inference/passthrough/__init__.py index 69dd4c461..1cc46bff1 100644 --- a/src/llama_stack/providers/remote/inference/passthrough/__init__.py +++ b/src/llama_stack/providers/remote/inference/passthrough/__init__.py @@ -10,8 +10,8 @@ from .config import PassthroughImplConfig class PassthroughProviderDataValidator(BaseModel): - url: str - api_key: str + passthrough_url: str + passthrough_api_key: str async def get_adapter_impl(config: PassthroughImplConfig, _deps): diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py index f8e8b8ce5..eca28a86a 100644 --- a/src/llama_stack/providers/remote/inference/passthrough/config.py +++ b/src/llama_stack/providers/remote/inference/passthrough/config.py @@ -6,7 +6,7 @@ from typing import Any -from pydantic import Field, SecretStr +from pydantic import Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.schema_utils import json_schema_type @@ -19,11 +19,6 @@ class PassthroughImplConfig(RemoteInferenceProviderConfig): description="The URL for the passthrough endpoint", ) - api_key: SecretStr | None = Field( - default=None, - description="API Key for the passthrouth endpoint", - ) - @classmethod def sample_run_config( cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py index 4d4d4f41d..3c56acfbd 100644 --- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -5,9 +5,8 @@ # the root directory of this source tree. from collections.abc import AsyncIterator -from typing import Any -from llama_stack_client import AsyncLlamaStackClient +from openai import AsyncOpenAI from llama_stack.apis.inference import ( Inference, @@ -20,103 +19,117 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingsResponse, ) from llama_stack.apis.models import Model -from llama_stack.core.library_client import convert_pydantic_to_json_value -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper +from llama_stack.core.request_headers import NeedsRequestProviderData from .config import PassthroughImplConfig -class PassthroughInferenceAdapter(Inference): +class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference): def __init__(self, config: PassthroughImplConfig) -> None: - ModelRegistryHelper.__init__(self) self.config = config + async def initialize(self) -> None: + pass + + async def shutdown(self) -> None: + pass + async def unregister_model(self, model_id: str) -> None: pass async def register_model(self, model: Model) -> Model: return model - def _get_client(self) -> AsyncLlamaStackClient: - passthrough_url = None - passthrough_api_key = None - provider_data = None + async def list_models(self) -> list[Model]: + """List models by calling the downstream /v1/models endpoint.""" + client = self._get_openai_client() - if self.config.url is not None: - passthrough_url = self.config.url - else: - provider_data = self.get_request_provider_data() - if provider_data is None or not provider_data.passthrough_url: - raise ValueError( - 'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": }' - ) - passthrough_url = provider_data.passthrough_url + response = await client.models.list() - if self.config.api_key is not None: - passthrough_api_key = self.config.api_key.get_secret_value() - else: - provider_data = self.get_request_provider_data() - if provider_data is None or not provider_data.passthrough_api_key: - raise ValueError( - 'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": }' - ) - passthrough_api_key = provider_data.passthrough_api_key + # Convert from OpenAI format to Llama Stack Model format + models = [] + for model_data in response.data: + downstream_model_id = model_data.id + custom_metadata = getattr(model_data, "custom_metadata", {}) or {} - return AsyncLlamaStackClient( - base_url=passthrough_url, - api_key=passthrough_api_key, - provider_data=provider_data, + # Prefix identifier with provider ID for local registry + local_identifier = f"{self.__provider_id__}/{downstream_model_id}" + + model = Model( + identifier=local_identifier, + provider_id=self.__provider_id__, + provider_resource_id=downstream_model_id, + model_type=custom_metadata.get("model_type", "llm"), + metadata=custom_metadata, + ) + models.append(model) + + return models + + async def should_refresh_models(self) -> bool: + """Passthrough should refresh models since they come from downstream dynamically.""" + return self.config.refresh_models + + def _get_openai_client(self) -> AsyncOpenAI: + """Get an AsyncOpenAI client configured for the downstream server.""" + base_url = self._get_passthrough_url() + api_key = self._get_passthrough_api_key() + + return AsyncOpenAI( + base_url=f"{base_url.rstrip('/')}/v1", + api_key=api_key, ) - async def openai_embeddings( - self, - params: OpenAIEmbeddingsRequestWithExtraBody, - ) -> OpenAIEmbeddingsResponse: - raise NotImplementedError() + def _get_passthrough_url(self) -> str: + """Get the passthrough URL from config or provider data.""" + if self.config.url is not None: + return self.config.url + + provider_data = self.get_request_provider_data() + if provider_data is None: + raise ValueError( + 'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": }' + ) + return provider_data.passthrough_url + + def _get_passthrough_api_key(self) -> str: + """Get the passthrough API key from config or provider data.""" + if self.config.auth_credential is not None: + return self.config.auth_credential.get_secret_value() + + provider_data = self.get_request_provider_data() + if provider_data is None: + raise ValueError( + 'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": }' + ) + return provider_data.passthrough_api_key async def openai_completion( self, params: OpenAICompletionRequestWithExtraBody, ) -> OpenAICompletion: - client = self._get_client() - model_obj = await self.model_store.get_model(params.model) - - params = params.model_copy() - params.model = model_obj.provider_resource_id - + """Forward completion request to downstream using OpenAI client.""" + client = self._get_openai_client() request_params = params.model_dump(exclude_none=True) - - return await client.inference.openai_completion(**request_params) + response = await client.completions.create(**request_params) + return response # type: ignore async def openai_chat_completion( self, params: OpenAIChatCompletionRequestWithExtraBody, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - client = self._get_client() - model_obj = await self.model_store.get_model(params.model) - - params = params.model_copy() - params.model = model_obj.provider_resource_id - + """Forward chat completion request to downstream using OpenAI client.""" + client = self._get_openai_client() request_params = params.model_dump(exclude_none=True) + response = await client.chat.completions.create(**request_params) + return response # type: ignore - return await client.inference.openai_chat_completion(**request_params) - - def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]: - json_params = {} - for key, value in request_params.items(): - json_input = convert_pydantic_to_json_value(value) - if isinstance(json_input, dict): - json_input = {k: v for k, v in json_input.items() if v is not None} - elif isinstance(json_input, list): - json_input = [x for x in json_input if x is not None] - new_input = [] - for x in json_input: - if isinstance(x, dict): - x = {k: v for k, v in x.items() if v is not None} - new_input.append(x) - json_input = new_input - - json_params[key] = json_input - - return json_params + async def openai_embeddings( + self, + params: OpenAIEmbeddingsRequestWithExtraBody, + ) -> OpenAIEmbeddingsResponse: + """Forward embeddings request to downstream using OpenAI client.""" + client = self._get_openai_client() + request_params = params.model_dump(exclude_none=True) + response = await client.embeddings.create(**request_params) + return response # type: ignore diff --git a/src/llama_stack_ui/.dockerignore b/src/llama_stack_ui/.dockerignore new file mode 100644 index 000000000..e3d1daae6 --- /dev/null +++ b/src/llama_stack_ui/.dockerignore @@ -0,0 +1,20 @@ +.git +.gitignore +.env.local +.env.*.local +.next +node_modules +npm-debug.log +*.md +.DS_Store +.vscode +.idea +playwright-report +e2e +jest.config.ts +jest.setup.ts +eslint.config.mjs +.prettierrc +.prettierignore +.nvmrc +playwright.config.ts diff --git a/src/llama_stack_ui/Dockerfile b/src/llama_stack_ui/Dockerfile new file mode 100644 index 000000000..39e132f19 --- /dev/null +++ b/src/llama_stack_ui/Dockerfile @@ -0,0 +1,21 @@ +FROM node:22.5.1-alpine + +ENV NODE_ENV=production +ENV PORT=8322 +ENV LLAMA_STACK_UI_PORT=$PORT + +# Install dumb-init for proper signal handling +RUN apk add --no-cache dumb-init + +# Create non-root user for security +RUN addgroup --system --gid 1001 nodejs +RUN adduser --system --uid 1001 nextjs + +# Install llama-stack-ui from npm +RUN npm install -g llama-stack-ui + +USER nextjs +EXPOSE $PORT + +ENTRYPOINT ["dumb-init", "--"] +CMD ["llama-stack-ui"] diff --git a/src/llama_stack_ui/bin/cli.js b/src/llama_stack_ui/bin/cli.js new file mode 100755 index 000000000..6069d2f22 --- /dev/null +++ b/src/llama_stack_ui/bin/cli.js @@ -0,0 +1,34 @@ +#!/usr/bin/env node + +const { spawn } = require('child_process'); +const path = require('path'); + +const port = process.env.LLAMA_STACK_UI_PORT || 8322; +const uiDir = path.resolve(__dirname, '..'); +const serverPath = path.join(uiDir, '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'server.js'); +const serverDir = path.dirname(serverPath); + +console.log(`Starting Llama Stack UI on http://localhost:${port}`); + +const child = spawn(process.execPath, [serverPath], { + cwd: serverDir, + stdio: 'inherit', + env: { + ...process.env, + PORT: port, + }, +}); + +process.on('SIGINT', () => { + child.kill('SIGINT'); + process.exit(0); +}); + +process.on('SIGTERM', () => { + child.kill('SIGTERM'); + process.exit(0); +}); + +child.on('exit', (code) => { + process.exit(code); +}); diff --git a/src/llama_stack_ui/next.config.js b/src/llama_stack_ui/next.config.js new file mode 100644 index 000000000..5055b2ad8 --- /dev/null +++ b/src/llama_stack_ui/next.config.js @@ -0,0 +1,10 @@ +/** @type {import('next').NextConfig} */ +const nextConfig = { + typescript: { + // TODO: Remove this once we fix the build errors + ignoreBuildErrors: true, + }, + output: 'standalone', +}; + +module.exports = nextConfig; diff --git a/src/llama_stack_ui/next.config.ts b/src/llama_stack_ui/next.config.ts index e9ffa3083..9f4a74eca 100644 --- a/src/llama_stack_ui/next.config.ts +++ b/src/llama_stack_ui/next.config.ts @@ -1,7 +1,13 @@ import type { NextConfig } from "next"; const nextConfig: NextConfig = { - /* config options here */ + typescript: { + ignoreBuildErrors: true, + }, + output: "standalone", + images: { + unoptimized: true, + }, }; export default nextConfig; diff --git a/src/llama_stack_ui/package-lock.json b/src/llama_stack_ui/package-lock.json index 14e34b720..aa8b2ac26 100644 --- a/src/llama_stack_ui/package-lock.json +++ b/src/llama_stack_ui/package-lock.json @@ -1,12 +1,13 @@ { - "name": "ui", - "version": "0.1.0", + "name": "llama-stack-ui", + "version": "0.4.0-alpha.1", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "ui", - "version": "0.1.0", + "name": "llama-stack-ui", + "version": "0.4.0-alpha.1", + "license": "MIT", "dependencies": { "@radix-ui/react-collapsible": "^1.1.12", "@radix-ui/react-dialog": "^1.1.15", @@ -20,7 +21,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "framer-motion": "^12.23.24", - "llama-stack-client": "github:llamastack/llama-stack-client-typescript", + "llama-stack-client": "^0.3.1", "lucide-react": "^0.545.0", "next": "15.5.4", "next-auth": "^4.24.11", @@ -9684,8 +9685,9 @@ "license": "MIT" }, "node_modules/llama-stack-client": { - "version": "0.4.0-alpha.1", - "resolved": "git+ssh://git@github.com/llamastack/llama-stack-client-typescript.git#78de4862c4b7d77939ac210fa9f9bde77a2c5c5f", + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.1.tgz", + "integrity": "sha512-4aYoF2aAQiBSfxyZEtczeQmJn8q9T22ePDqGhR+ej5RG6a8wvl5B3v7ZoKuFkft+vcP/kbJ58GQZEPLekxekZA==", "license": "MIT", "dependencies": { "@types/node": "^18.11.18", diff --git a/src/llama_stack_ui/package.json b/src/llama_stack_ui/package.json index fb7dbee75..d86ffaf60 100644 --- a/src/llama_stack_ui/package.json +++ b/src/llama_stack_ui/package.json @@ -1,11 +1,32 @@ { - "name": "ui", - "version": "0.1.0", - "private": true, + "name": "llama-stack-ui", + "version": "0.4.0-alpha.4", + "description": "Web UI for Llama Stack", + "license": "MIT", + "author": "Llama Stack ", + "repository": { + "type": "git", + "url": "https://github.com/llamastack/llama-stack.git", + "directory": "llama_stack_ui" + }, + "bin": { + "llama-stack-ui": "bin/cli.js" + }, + "files": [ + "bin", + ".next", + "public", + "next.config.ts", + "next.config.js", + "instrumentation.ts", + "tsconfig.json", + "package.json" + ], "scripts": { "dev": "next dev --turbopack --port ${LLAMA_STACK_UI_PORT:-8322}", - "build": "next build", + "build": "next build && node scripts/postbuild.js", "start": "next start", + "prepublishOnly": "npm run build", "lint": "next lint", "format": "prettier --write \"./**/*.{ts,tsx}\"", "format:check": "prettier --check \"./**/*.{ts,tsx}\"", @@ -25,7 +46,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "framer-motion": "^12.23.24", - "llama-stack-client": "github:llamastack/llama-stack-client-typescript", + "llama-stack-client": "^0.3.1", "lucide-react": "^0.545.0", "next": "15.5.4", "next-auth": "^4.24.11", diff --git a/src/llama_stack_ui/scripts/postbuild.js b/src/llama_stack_ui/scripts/postbuild.js new file mode 100644 index 000000000..4b4dbdf5d --- /dev/null +++ b/src/llama_stack_ui/scripts/postbuild.js @@ -0,0 +1,40 @@ +const fs = require('fs'); +const path = require('path'); + +// Copy public directory to standalone +const publicSrc = path.join(__dirname, '..', 'public'); +const publicDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'public'); + +if (fs.existsSync(publicSrc) && !fs.existsSync(publicDest)) { + console.log('Copying public directory to standalone...'); + copyDir(publicSrc, publicDest); +} + +// Copy .next/static to standalone +const staticSrc = path.join(__dirname, '..', '.next', 'static'); +const staticDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', '.next', 'static'); + +if (fs.existsSync(staticSrc) && !fs.existsSync(staticDest)) { + console.log('Copying .next/static to standalone...'); + copyDir(staticSrc, staticDest); +} + +function copyDir(src, dest) { + if (!fs.existsSync(dest)) { + fs.mkdirSync(dest, { recursive: true }); + } + + const files = fs.readdirSync(src); + files.forEach((file) => { + const srcFile = path.join(src, file); + const destFile = path.join(dest, file); + + if (fs.statSync(srcFile).isDirectory()) { + copyDir(srcFile, destFile); + } else { + fs.copyFileSync(srcFile, destFile); + } + }); +} + +console.log('Postbuild complete!'); diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json index 314070eab..858176dff 100644 --- a/tests/integration/ci_matrix.json +++ b/tests/integration/ci_matrix.json @@ -2,7 +2,8 @@ "default": [ {"suite": "base", "setup": "ollama"}, {"suite": "vision", "setup": "ollama-vision"}, - {"suite": "responses", "setup": "gpt"} + {"suite": "responses", "setup": "gpt"}, + {"suite": "base-vllm-subset", "setup": "vllm"} ], "schedules": { "1 0 * * 0": [ diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json new file mode 100644 index 000000000..00e0862e8 --- /dev/null +++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": null, + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374291, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-f70298e4ea3e4b4eb7f2cc2deb7a2b01", + "object": "model_permission", + "created": 1762374291, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json b/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json new file mode 100644 index 000000000..605baf12e --- /dev/null +++ b/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json @@ -0,0 +1,3010 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tools_and_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "system", + "content": "Pretend you are a weather assistant." + }, + { + "role": "user", + "content": "What's the weather like in San Francisco, CA?" + } + ], + "max_tokens": 4096, + "stream": true, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state (both required), e.g. San Francisco, CA." + } + }, + "required": [ + "location" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "Okay", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " user", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " asking", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " about", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " San", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " Francisco", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " CA", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " need", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " use", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " get", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "_weather", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " function", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " here", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " The", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " function", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " requires", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " location", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " parameter", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " which", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " this", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " case", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " \"", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "San", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " Francisco", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " CA", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "\".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " should", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " make", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " sure", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " include", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " both", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " city", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " state", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " as", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " specified", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " Let", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " me", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " check", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " if", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " there", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "'s", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " any", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " other", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " needed", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " but", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " user", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " just", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " wants", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " So", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " tool", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " call", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " should", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " be", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " straightforward", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "'ll", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " format", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " JSON", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " correctly", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " within", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " tool", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "_call", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " tags", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "\n\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": "chatcmpl-tool-33d90102b2fe4386808056bc3fa9ad17", + "function": { + "arguments": null, + "name": "get_weather" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "{\"location\": \"", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "San", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": " Francisco", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": ",", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": " CA", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "\"}", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json b/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json new file mode 100644 index 000000000..bbb81ab62 --- /dev/null +++ b/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json @@ -0,0 +1,84 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_tool_without_schema[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "Call the no args tool" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "no_args_tool", + "description": "Tool with no arguments", + "parameters": { + "type": "object", + "properties": {} + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-452805c3c859", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user wants me to call the no args tool. Let me check the available functions. There's only one tool provided, which is the no_args_tool with no arguments. Since the user didn't specify any parameters, I should just return the tool call as instructed. I need to make sure the JSON is correctly formatted and within the XML tags. Alright, that's all I need.\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-7a67269afe214c85924c5171612bbdbd", + "function": { + "arguments": "{}", + "name": "no_args_tool" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 101, + "prompt_tokens": 136, + "total_tokens": 237, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json b/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json new file mode 100644 index 000000000..04c59b0b7 --- /dev/null +++ b/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json @@ -0,0 +1,92 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_chat_completion_with_tools[openai_client-txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "What's the weather in Tokyo?" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather information", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City name" + } + }, + "required": [ + "location" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-496035259763", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user is asking about the weather in Tokyo. I need to use the get_weather function for that. The function requires the location parameter, which in this case is Tokyo. I should make sure to specify \"Tokyo\" as the location. Let me check if there are any other parameters needed, but no, the function only needs the location. So the tool call should be straightforward. I'll format the JSON correctly inside the tool_call tags.\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-959b557fa67e4134a2391f5d35e5d5ae", + "function": { + "arguments": "{\"location\": \"Tokyo\"}", + "name": "get_weather" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 117, + "prompt_tokens": 158, + "total_tokens": 275, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json b/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json new file mode 100644 index 000000000..b33363e45 --- /dev/null +++ b/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json @@ -0,0 +1,92 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_format_preserves_complex_schemas[openai_client-txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "Process this data" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "process_data", + "description": "Process structured data", + "parameters": { + "type": "object", + "properties": { + "data": { + "$ref": "#/$defs/DataObject" + } + }, + "$defs": { + "DataObject": { + "type": "object", + "properties": { + "values": { + "type": "array", + "items": { + "type": "number" + } + } + } + } + } + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-524ead18daad", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user wants me to process the data. Let me check the available tools. There's a function called process_data that takes an object with a 'data' parameter. The data is an array of numbers. But the user hasn't provided any specific data yet. They just said \"Process this data.\" Hmm, maybe they expect me to prompt them for the data first. Wait, maybe there's a misunderstanding. Did they include the data in the conversation history? Let me look back. The user's message is \"Process this data.\" No data provided. Oh, maybe they made a mistake and forgot to include it. I need to ask them to provide the data so I can proceed. Let me confirm if there's any data mentioned. No, the current input is just the instruction. So I should ask the user to supply the data array of numbers to process.\n\n\nPlease provide the structured data you'd like me to process. For example, an array of numbers like `[1, 2, 3]`.", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 212, + "prompt_tokens": 180, + "total_tokens": 392, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json b/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json new file mode 100644 index 000000000..a20b23fa2 --- /dev/null +++ b/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json @@ -0,0 +1,2113 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestStreamingWithTools::test_streaming_tool_calls[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "What time is it in UTC?" + } + ], + "max_tokens": 4096, + "stream": true, + "tools": [ + { + "type": "function", + "function": { + "name": "get_time", + "description": "Get current time", + "parameters": { + "type": "object", + "properties": { + "timezone": { + "type": "string" + } + } + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "Okay", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " user", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " asking", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " time", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " UTC", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " Let", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " me", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " check", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " tools", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " available", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " There", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "'s", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " function", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " called", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " get", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "_time", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " takes", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " timezone", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " parameter", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " Since", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " UTC", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " standard", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " time", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " zone", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " need", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " specify", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " So", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "'ll", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " call", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " get", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "_time", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " timezone", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " set", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " \"", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "UTC", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "\".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " That", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " should", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " retrieve", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " time", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " correctly", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ".\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "\n\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": "chatcmpl-tool-41faa6bedd074d51a6335cd2447deeab", + "function": { + "arguments": null, + "name": "get_time" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "{\"timezone\": \"", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "UTC", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "\"}", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json b/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json new file mode 100644 index 000000000..539668be7 --- /dev/null +++ b/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json @@ -0,0 +1,98 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tools[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "system", + "content": "Pretend you are a weather assistant." + }, + { + "role": "user", + "content": "What's the weather like in San Francisco, CA?" + } + ], + "max_tokens": 4096, + "stream": false, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state (both required), e.g. San Francisco, CA." + } + }, + "required": [ + "location" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-744052775cf9", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user is asking about the weather in San Francisco, CA. I need to use the get_weather function. The function requires the location parameter, which is provided as San Francisco, CA. I should make sure to format the arguments correctly as a JSON object. Let me check the required parameters again. The location is required, so I can't omit it. I'll structure the tool call with the name \"get_weather\" and the arguments including \"location\": \"San Francisco, CA\". That should get the current weather information for the user.\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-b59dc311dd914d3dbd6d455b122bc39c", + "function": { + "arguments": "{\"location\": \"San Francisco, CA\"}", + "name": "get_weather" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 138, + "prompt_tokens": 185, + "total_tokens": 323, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json b/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json new file mode 100644 index 000000000..05b4e2609 --- /dev/null +++ b/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json @@ -0,0 +1,67 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tool_choice_none[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "system", + "content": "Pretend you are a weather assistant." + }, + { + "role": "user", + "content": "What's the weather like in San Francisco, CA?" + } + ], + "max_tokens": 4096, + "stream": false + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-77cf21828360", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user is asking about the weather in San Francisco, CA. I need to check the current weather conditions. But wait, I can't access real-time data. I should mention that I can't provide the current weather forecast and ask them to check a reliable source like the National Weather Service or a weather app. Also, maybe suggest they can provide more details if they need help with something else related to the weather.\n\n\nI'm sorry, but I can't provide real-time weather information. However, you can check the current weather for San Francisco, CA using the National Weather Service (NWS) website, weather apps like Weather.com, or local meteorological services. Let me know if there's anything else I can assist with!", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 154, + "prompt_tokens": 33, + "total_tokens": 187, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json b/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json new file mode 100644 index 000000000..2f6e4d3fc --- /dev/null +++ b/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json @@ -0,0 +1,128 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_multiple_tools_with_different_schemas[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "Use one of the available tools" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "simple", + "parameters": { + "type": "object", + "properties": { + "x": { + "type": "string" + } + } + } + } + }, + { + "type": "function", + "function": { + "name": "complex", + "parameters": { + "type": "object", + "properties": { + "data": { + "$ref": "#/$defs/Complex" + } + }, + "$defs": { + "Complex": { + "type": "object", + "properties": { + "nested": { + "type": "array", + "items": { + "type": "number" + } + } + } + } + } + } + } + }, + { + "type": "function", + "function": { + "name": "with_output", + "parameters": { + "type": "object", + "properties": { + "input": { + "type": "string" + } + } + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-853f6a700b98", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, let's see. The user wants me to use one of the available tools. The tools provided are simple, complex, and with_output. The simple function takes an argument 'x' of type string. The complex function requires a 'data' parameter that's an object with a nested array of numbers. The with_output function takes an input string.\n\nThe user's query is about using a tool, but there's no specific function name mentioned. Wait, maybe the user expects me to choose one based on the context. Since the tools are available, but the query is a general instruction, perhaps I should ask for clarification. However, the instructions say to use one of the tools if possible. Since the user hasn't specified a particular function, maybe I should check if there's any implicit function needed. But looking at the tools, none are directly related to the query. The user might need to specify which tool to use. Alternatively, maybe the answer is to call the simple function with an example input. But without more context, it's hard to tell. Wait, maybe the user expects me to choose the simplest one. Let's go with the simple function first. So the tool call would be to the simple function with x set to some value. But the user hasn't provided a specific value. Maybe I should state that the tool requires a value. But according to the instructions, if possible, use one of the tools. Since the user hasn't given a value, perhaps the answer is to call the simple function with an example. But the parameters for the simple function require 'x' which is a string. Maybe the user expects me to proceed without needing more info. So I'll proceed by calling the simple function with x as \"example\".\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-12e2ba0189cf484bb936cbb254a5c32a", + "function": { + "arguments": "{\"x\": \"example\"}", + "name": "simple" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 378, + "prompt_tokens": 265, + "total_tokens": 643, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/99bf0054f11a9c58c13a44f9cf962a706ebe85e2a5fe637ddad558cbaafe92d8.json b/tests/integration/inference/recordings/99bf0054f11a9c58c13a44f9cf962a706ebe85e2a5fe637ddad558cbaafe92d8.json new file mode 100644 index 000000000..250e91c68 --- /dev/null +++ b/tests/integration/inference/recordings/99bf0054f11a9c58c13a44f9cf962a706ebe85e2a5fe637ddad558cbaafe92d8.json @@ -0,0 +1,103 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestMCPToolsInChatCompletion::test_mcp_tools_in_inference[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "Calculate 5 + 3" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "calculate", + "description": "", + "parameters": { + "properties": { + "x": { + "title": "X", + "type": "number" + }, + "y": { + "title": "Y", + "type": "number" + }, + "operation": { + "title": "Operation", + "type": "string" + } + }, + "required": [ + "x", + "y", + "operation" + ], + "title": "calculateArguments", + "type": "object" + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-99bf0054f11a", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user wants to calculate 5 plus 3. Let me check the tools provided. The only function available is 'calculate', which requires x, y, and operation. The parameters are numbers and an operation. The user input is straightforward: 5 + 3. So I need to call the 'calculate' function with x=5, y=3, and operation='+'. That should give the correct result. I don't see any other parameters needed here. Just make sure the JSON is correctly formatted with the required fields.\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-6d1a92899a8246bb8fae5682dc08590c", + "function": { + "arguments": "{\"x\": 5, \"y\": 3, \"operation\": \"+\"}", + "name": "calculate" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 144, + "prompt_tokens": 193, + "total_tokens": 337, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json b/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json new file mode 100644 index 000000000..fdec100bb --- /dev/null +++ b/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json @@ -0,0 +1,114 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_tool_with_complex_schema[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "Book a flight from SFO to JFK for John Doe" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "book_flight", + "description": "Book a flight", + "parameters": { + "type": "object", + "properties": { + "flight": { + "$ref": "#/$defs/FlightInfo" + }, + "passenger": { + "$ref": "#/$defs/Passenger" + } + }, + "required": [ + "flight", + "passenger" + ], + "$defs": { + "FlightInfo": { + "type": "object", + "properties": { + "from": { + "type": "string" + }, + "to": { + "type": "string" + }, + "date": { + "type": "string", + "format": "date" + } + } + }, + "Passenger": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "age": { + "type": "integer" + } + } + } + } + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-cfb292c0f41d", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user wants to book a flight from SFO to JFK for John Doe. Let me check the tools available. The provided function is book_flight, which requires flight information and a passenger. The parameters needed are flight (as a FlightInfo object) and passenger (with name and age). The user mentioned SFO to JFK, so the flight details are from and to. The passenger's name is John Doe, but the age isn't provided. Wait, the function parameters require the passenger's name and age, but the user only mentioned the name. Maybe the age is missing? But the user didn't specify it, so perhaps I should note that the age is required. However, the function's required parameters are flight and passenger, so even if age is missing, the function can't be called without it. So I need to include both flight info and passenger details. The user's message only gives the name and destination, not the flight details or age. Therefore, I need to ask for the flight details and the passenger's age. But the user hasn't provided those. So I can't proceed with the function call. Wait, but maybe the user expects me to assume some default? No, the function requires all parameters. Since the user hasn't provided flight details or age, I can't call the function. So the correct response is to prompt the user for those details.\n\n\nThe booking requires the flight details and passenger's age. Could you provide the flight number and John Doe's age?", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 310, + "prompt_tokens": 261, + "total_tokens": 571, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json b/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json new file mode 100644 index 000000000..eb6eb8eb2 --- /dev/null +++ b/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json @@ -0,0 +1,96 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_structured_output[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:structured_output]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant. Michael Jordan was born in 1963. His first name is \"Michael\", He played basketball for the Chicago Bulls for 15 seasons and was drafted in 1984" + }, + { + "role": "user", + "content": "Please give me information about Michael Jordan." + } + ], + "max_tokens": 4096, + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "AnswerFormat", + "schema": { + "properties": { + "first_name": { + "title": "First Name", + "type": "string" + }, + "last_name": { + "title": "Last Name", + "type": "string" + }, + "year_of_birth": { + "title": "Year Of Birth", + "type": "integer" + } + }, + "required": [ + "first_name", + "last_name", + "year_of_birth" + ], + "title": "AnswerFormat", + "type": "object" + } + } + }, + "stream": false + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-df353403c7fb", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 28, + "prompt_tokens": 66, + "total_tokens": 94, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json b/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json new file mode 100644 index 000000000..856684a55 --- /dev/null +++ b/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json @@ -0,0 +1,92 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_simple_tool_call[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "What's the weather in San Francisco?" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City name" + } + }, + "required": [ + "location" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-e89112e7735f", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user is asking for the weather in San Francisco. I need to check if there's a function available for that. Looking at the tools provided, there's a function called get_weather that requires a location parameter. The description says it gets weather for a location, and the parameter is the city name. The user provided \"San Francisco\" as the location, so I should call the get_weather function with \"San Francisco\" as the argument. I don't see any other parameters needed here, so the tool call should be straightforward. Just make sure the city name is correctly formatted in JSON.\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-feead29842dc40b2831c41ed397f555f", + "function": { + "arguments": "{\"location\": \"San Francisco\"}", + "name": "get_weather" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 146, + "prompt_tokens": 161, + "total_tokens": 307, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json b/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json new file mode 100644 index 000000000..28f7d8296 --- /dev/null +++ b/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json @@ -0,0 +1,53 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "prompt": "I am feeling really sad today.", + "stream": false, + "extra_body": { + "guided_choice": [ + "joy", + "sadness" + ] + } + }, + "endpoint": "/v1/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "rec-f02f1bfd75ad", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "text": "joy", + "stop_reason": null, + "prompt_logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "text_completion", + "system_fingerprint": null, + "usage": { + "completion_tokens": 2, + "prompt_tokens": 7, + "total_tokens": 9, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json b/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json new file mode 100644 index 000000000..7256ae75d --- /dev/null +++ b/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375180, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-aeeb49e5e51c42fa94562780165bd620", + "object": "model_permission", + "created": 1762375180, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json b/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json new file mode 100644 index 000000000..e0f5fa68f --- /dev/null +++ b/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375115, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-feec0a894be04f738e12b596ff163b64", + "object": "model_permission", + "created": 1762375115, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json b/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json new file mode 100644 index 000000000..0eb0a26a6 --- /dev/null +++ b/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_stop_sequence[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:stop_sequence]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374330, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-119e17052e4c4c13bd791af3138d5360", + "object": "model_permission", + "created": 1762374330, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json b/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json new file mode 100644 index 000000000..dc7c97f4f --- /dev/null +++ b/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375226, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-c6ae673fda084519b3c67947896cd3b0", + "object": "model_permission", + "created": 1762375226, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json b/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json new file mode 100644 index 000000000..833003741 --- /dev/null +++ b/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374573, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-3f422354a81e491b87f93d5b192a0e1a", + "object": "model_permission", + "created": 1762374573, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json b/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json new file mode 100644 index 000000000..df660a0f1 --- /dev/null +++ b/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374305, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-794e16e59ddb4216a8bedfdf485b8f24", + "object": "model_permission", + "created": 1762374305, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json b/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json new file mode 100644 index 000000000..fed71ffa7 --- /dev/null +++ b/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=vllm/Qwen/Qwen3-0.6B-True]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374317, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-ff7d26d076eb4373a0631a80fe3ae063", + "object": "model_permission", + "created": 1762374317, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json b/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json new file mode 100644 index 000000000..9a532d386 --- /dev/null +++ b/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375033, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-2a16fede981b43be9e1cbe3dbedd1e74", + "object": "model_permission", + "created": 1762375033, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json b/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json new file mode 100644 index 000000000..ab3269b57 --- /dev/null +++ b/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374297, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-4bc93704559a4e1d8492aeec7222040c", + "object": "model_permission", + "created": 1762374297, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-394c30370fe5b724c5fe1292984373b281d47b2ac0d49e8b598f13cf100b3ad8-fb68f5a6.json b/tests/integration/inference/recordings/models-394c30370fe5b724c5fe1292984373b281d47b2ac0d49e8b598f13cf100b3ad8-fb68f5a6.json new file mode 100644 index 000000000..8237cc20c --- /dev/null +++ b/tests/integration/inference/recordings/models-394c30370fe5b724c5fe1292984373b281d47b2ac0d49e8b598f13cf100b3ad8-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374532, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-e353aa079d5145c19953791ac99daeba", + "object": "model_permission", + "created": 1762374532, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-3f4208962fdb2be3e7057777fc93a149890bd1dfa8a92597e176f23658e86cd8-fb68f5a6.json b/tests/integration/inference/recordings/models-3f4208962fdb2be3e7057777fc93a149890bd1dfa8a92597e176f23658e86cd8-fb68f5a6.json new file mode 100644 index 000000000..14b37fb0a --- /dev/null +++ b/tests/integration/inference/recordings/models-3f4208962fdb2be3e7057777fc93a149890bd1dfa8a92597e176f23658e86cd8-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-True]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375260, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-10c27d1c9e324b18b65321b422e19af9", + "object": "model_permission", + "created": 1762375260, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-4a729b00af209ad60846d1904e5973ad081aa5f595de50f5ef1aae304cb67ef3-fb68f5a6.json b/tests/integration/inference/recordings/models-4a729b00af209ad60846d1904e5973ad081aa5f595de50f5ef1aae304cb67ef3-fb68f5a6.json new file mode 100644 index 000000000..4af25e17a --- /dev/null +++ b/tests/integration/inference/recordings/models-4a729b00af209ad60846d1904e5973ad081aa5f595de50f5ef1aae304cb67ef3-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375040, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-f01c211577294936958dd28046c89dba", + "object": "model_permission", + "created": 1762375040, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-4f62bcb9cdf74f4c2ed804038def162f18ad384182b0f174918607e9ed3c1515-fb68f5a6.json b/tests/integration/inference/recordings/models-4f62bcb9cdf74f4c2ed804038def162f18ad384182b0f174918607e9ed3c1515-fb68f5a6.json new file mode 100644 index 000000000..54b48967b --- /dev/null +++ b/tests/integration/inference/recordings/models-4f62bcb9cdf74f4c2ed804038def162f18ad384182b0f174918607e9ed3c1515-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375266, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-7166a6fcd331435eb2d0f0a6b23382ed", + "object": "model_permission", + "created": 1762375266, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-54b420cdb98a0149a618088f55746e26b7bf6e7c5ebf5fa07c13ec9e366521d3-fb68f5a6.json b/tests/integration/inference/recordings/models-54b420cdb98a0149a618088f55746e26b7bf6e7c5ebf5fa07c13ec9e366521d3-fb68f5a6.json new file mode 100644 index 000000000..30a33793b --- /dev/null +++ b/tests/integration/inference/recordings/models-54b420cdb98a0149a618088f55746e26b7bf6e7c5ebf5fa07c13ec9e366521d3-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374301, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-cd16b092c5a04e719ddf786f0c3e935e", + "object": "model_permission", + "created": 1762374301, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-613f5d11a8cda7126115f96650334fde0a0457a6b4a2605bc15eec9b50a6956c-fb68f5a6.json b/tests/integration/inference/recordings/models-613f5d11a8cda7126115f96650334fde0a0457a6b4a2605bc15eec9b50a6956c-fb68f5a6.json new file mode 100644 index 000000000..4193dce51 --- /dev/null +++ b/tests/integration/inference/recordings/models-613f5d11a8cda7126115f96650334fde0a0457a6b4a2605bc15eec9b50a6956c-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:sanity]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374295, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-9f71adbb206846bb9d0e12834e41551e", + "object": "model_permission", + "created": 1762374295, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-62a361f55d61a98ea0863e9acfb5ab5d540c5d19e791415ee476474f7f1ed90f-fb68f5a6.json b/tests/integration/inference/recordings/models-62a361f55d61a98ea0863e9acfb5ab5d540c5d19e791415ee476474f7f1ed90f-fb68f5a6.json new file mode 100644 index 000000000..d5916fff7 --- /dev/null +++ b/tests/integration/inference/recordings/models-62a361f55d61a98ea0863e9acfb5ab5d540c5d19e791415ee476474f7f1ed90f-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_logprobs_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:log_probs]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374342, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-a8b7b38c40584a03b4b346b6c181fb93", + "object": "model_permission", + "created": 1762374342, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-668fc72f70ac72d5c112fe79d86d5c790611456b3f0102832f27e6edd420ab54-fb68f5a6.json b/tests/integration/inference/recordings/models-668fc72f70ac72d5c112fe79d86d5c790611456b3f0102832f27e6edd420ab54-fb68f5a6.json new file mode 100644 index 000000000..1542aa0cf --- /dev/null +++ b/tests/integration/inference/recordings/models-668fc72f70ac72d5c112fe79d86d5c790611456b3f0102832f27e6edd420ab54-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375235, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-dd48560646f141298f5cc2ef3467e54b", + "object": "model_permission", + "created": 1762375235, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-702eee4572e9b17ff0b0fdd55b10021f7077f0afcba922d6a53db0b537542518-fb68f5a6.json b/tests/integration/inference/recordings/models-702eee4572e9b17ff0b0fdd55b10021f7077f0afcba922d6a53db0b537542518-fb68f5a6.json new file mode 100644 index 000000000..fed0e7785 --- /dev/null +++ b/tests/integration/inference/recordings/models-702eee4572e9b17ff0b0fdd55b10021f7077f0afcba922d6a53db0b537542518-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374500, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-0ba0c3a54dcb4e57bc0308fd54425933", + "object": "model_permission", + "created": 1762374500, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-723d37a5bceab199cff076a0dcc2d4ee7596b7c800f13c64f6a6ecdbf4ed2f3a-fb68f5a6.json b/tests/integration/inference/recordings/models-723d37a5bceab199cff076a0dcc2d4ee7596b7c800f13c64f6a6ecdbf4ed2f3a-fb68f5a6.json new file mode 100644 index 000000000..f53aadba5 --- /dev/null +++ b/tests/integration/inference/recordings/models-723d37a5bceab199cff076a0dcc2d4ee7596b7c800f13c64f6a6ecdbf4ed2f3a-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=vllm/Qwen/Qwen3-0.6B-True]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374311, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-e95a9ed7439245b5995add97fb50f765", + "object": "model_permission", + "created": 1762374311, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-768c497339830cf86ddd7843f33d0ed06b3bce3ef2ae9f854364b534ba8cafb7-fb68f5a6.json b/tests/integration/inference/recordings/models-768c497339830cf86ddd7843f33d0ed06b3bce3ef2ae9f854364b534ba8cafb7-fb68f5a6.json new file mode 100644 index 000000000..5f5d13e94 --- /dev/null +++ b/tests/integration/inference/recordings/models-768c497339830cf86ddd7843f33d0ed06b3bce3ef2ae9f854364b534ba8cafb7-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375099, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-6b5eba46536f43df902871dd257e1676", + "object": "model_permission", + "created": 1762375099, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-7b0f2493d699e58cdfe0a9dab38f4423771c8ebced2020b1e15cbb35470c1ca2-fb68f5a6.json b/tests/integration/inference/recordings/models-7b0f2493d699e58cdfe0a9dab38f4423771c8ebced2020b1e15cbb35470c1ca2-fb68f5a6.json new file mode 100644 index 000000000..69b91db75 --- /dev/null +++ b/tests/integration/inference/recordings/models-7b0f2493d699e58cdfe0a9dab38f4423771c8ebced2020b1e15cbb35470c1ca2-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375207, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-bbfbcf20cac146e0ae5e45ae6a42632d", + "object": "model_permission", + "created": 1762375207, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-7ed97509ff199eabe1380caa36b9e5934e9d04a9cafcfa2d21d20f6f85679ae4-fb68f5a6.json b/tests/integration/inference/recordings/models-7ed97509ff199eabe1380caa36b9e5934e9d04a9cafcfa2d21d20f6f85679ae4-fb68f5a6.json new file mode 100644 index 000000000..d6d7b81a0 --- /dev/null +++ b/tests/integration/inference/recordings/models-7ed97509ff199eabe1380caa36b9e5934e9d04a9cafcfa2d21d20f6f85679ae4-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375273, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-4935d35e00fd4acdbe78662f42342e77", + "object": "model_permission", + "created": 1762375273, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-805e6b510b1ab33505a1af85c0d2a766cd3415512212d80f6292ca0ef5c359e1-fb68f5a6.json b/tests/integration/inference/recordings/models-805e6b510b1ab33505a1af85c0d2a766cd3415512212d80f6292ca0ef5c359e1-fb68f5a6.json new file mode 100644 index 000000000..1e7cb92bf --- /dev/null +++ b/tests/integration/inference/recordings/models-805e6b510b1ab33505a1af85c0d2a766cd3415512212d80f6292ca0ef5c359e1-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374591, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-e19031997a1e44d99c8b5ae55725a887", + "object": "model_permission", + "created": 1762374591, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-860b7e8309e0761e20e845be75c0a28d759384a367f6308f2a921702318a5dba-fb68f5a6.json b/tests/integration/inference/recordings/models-860b7e8309e0761e20e845be75c0a28d759384a367f6308f2a921702318a5dba-fb68f5a6.json new file mode 100644 index 000000000..2e4cb4cb0 --- /dev/null +++ b/tests/integration/inference/recordings/models-860b7e8309e0761e20e845be75c0a28d759384a367f6308f2a921702318a5dba-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375027, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-ec44b40a73b04912a837001376b59cff", + "object": "model_permission", + "created": 1762375027, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-8903569d538f9836ac6251d90c4668d3057e8e0ced847a08fd7a6faedb5710c3-fb68f5a6.json b/tests/integration/inference/recordings/models-8903569d538f9836ac6251d90c4668d3057e8e0ced847a08fd7a6faedb5710c3-fb68f5a6.json new file mode 100644 index 000000000..4ab79796c --- /dev/null +++ b/tests/integration/inference/recordings/models-8903569d538f9836ac6251d90c4668d3057e8e0ced847a08fd7a6faedb5710c3-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374356, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-3203232f1dbd426aba98ef1593dd3c01", + "object": "model_permission", + "created": 1762374356, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-8aa8c593dd64639678c294146fd56804393856c6e85197e6317ebd88351be21d-fb68f5a6.json b/tests/integration/inference/recordings/models-8aa8c593dd64639678c294146fd56804393856c6e85197e6317ebd88351be21d-fb68f5a6.json new file mode 100644 index 000000000..f2c124b73 --- /dev/null +++ b/tests/integration/inference/recordings/models-8aa8c593dd64639678c294146fd56804393856c6e85197e6317ebd88351be21d-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375248, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-5efe67a621074e979edaaf8fcfee9a80", + "object": "model_permission", + "created": 1762375248, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-8fc4c7b563b9bd423b74dcb4683039248f41d86c02703bd2dce845d972c9ae6f-fb68f5a6.json b/tests/integration/inference/recordings/models-8fc4c7b563b9bd423b74dcb4683039248f41d86c02703bd2dce845d972c9ae6f-fb68f5a6.json new file mode 100644 index 000000000..0b5b6e4ed --- /dev/null +++ b/tests/integration/inference/recordings/models-8fc4c7b563b9bd423b74dcb4683039248f41d86c02703bd2dce845d972c9ae6f-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375135, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-5509cf924e5e4fc89091e4593f264258", + "object": "model_permission", + "created": 1762375135, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-99ae704b53e3e3150cac5cd579e446e6545a4ab6a63048ce00ee1fbe5fbf1b4e-fb68f5a6.json b/tests/integration/inference/recordings/models-99ae704b53e3e3150cac5cd579e446e6545a4ab6a63048ce00ee1fbe5fbf1b4e-fb68f5a6.json new file mode 100644 index 000000000..f8ded3ee4 --- /dev/null +++ b/tests/integration/inference/recordings/models-99ae704b53e3e3150cac5cd579e446e6545a4ab6a63048ce00ee1fbe5fbf1b4e-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374301, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-72ed55b56df1471b9f71c48bacf8b768", + "object": "model_permission", + "created": 1762374301, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-9a36a281899f0800f085473f5f0185b09a02022607b965ac08b4db2e9e7eabc9-fb68f5a6.json b/tests/integration/inference/recordings/models-9a36a281899f0800f085473f5f0185b09a02022607b965ac08b4db2e9e7eabc9-fb68f5a6.json new file mode 100644 index 000000000..32ecdf0b6 --- /dev/null +++ b/tests/integration/inference/recordings/models-9a36a281899f0800f085473f5f0185b09a02022607b965ac08b4db2e9e7eabc9-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:suffix]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374295, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-abbbfbb49abc4312b2b2011d4d2ba19b", + "object": "model_permission", + "created": 1762374295, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-9beac41c66cbe8568bb72b5ba0f5608597ef8a14b42585c22b1e7c45526537c1-fb68f5a6.json b/tests/integration/inference/recordings/models-9beac41c66cbe8568bb72b5ba0f5608597ef8a14b42585c22b1e7c45526537c1-fb68f5a6.json new file mode 100644 index 000000000..454b7223f --- /dev/null +++ b/tests/integration/inference/recordings/models-9beac41c66cbe8568bb72b5ba0f5608597ef8a14b42585c22b1e7c45526537c1-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375065, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-d943fbda14264715906334300853cec7", + "object": "model_permission", + "created": 1762375065, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-a495ae010d48bb3649c822e3299e819c164c2311db231c81296ff4c72e6f81cb-fb68f5a6.json b/tests/integration/inference/recordings/models-a495ae010d48bb3649c822e3299e819c164c2311db231c81296ff4c72e6f81cb-fb68f5a6.json new file mode 100644 index 000000000..1eded64dd --- /dev/null +++ b/tests/integration/inference/recordings/models-a495ae010d48bb3649c822e3299e819c164c2311db231c81296ff4c72e6f81cb-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming_with_file[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374323, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-15a0a1106fff4fdd8ce7574373fe3cee", + "object": "model_permission", + "created": 1762374323, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-a77b3cb7370fd9f46e6ea12d72e1d9a8e7515f745289e93e5eb4a21d0e7b71b7-fb68f5a6.json b/tests/integration/inference/recordings/models-a77b3cb7370fd9f46e6ea12d72e1d9a8e7515f745289e93e5eb4a21d0e7b71b7-fb68f5a6.json new file mode 100644 index 000000000..9501f622a --- /dev/null +++ b/tests/integration/inference/recordings/models-a77b3cb7370fd9f46e6ea12d72e1d9a8e7515f745289e93e5eb4a21d0e7b71b7-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375082, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-ef1d3bc6fefc432380ef0eabdf216fd3", + "object": "model_permission", + "created": 1762375082, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-a82e913e058618dcb30b269a54d4e6a9cb1e0017a42efe04480874fe957194d4-fb68f5a6.json b/tests/integration/inference/recordings/models-a82e913e058618dcb30b269a54d4e6a9cb1e0017a42efe04480874fe957194d4-fb68f5a6.json new file mode 100644 index 000000000..6558ad3c4 --- /dev/null +++ b/tests/integration/inference/recordings/models-a82e913e058618dcb30b269a54d4e6a9cb1e0017a42efe04480874fe957194d4-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375165, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-b29f7386725b4f13976cd76b6dc3a278", + "object": "model_permission", + "created": 1762375165, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-bee98cb55c3b74854d0bb71b23b7e01bbb9f1580b413a26dc3afbf9da8b7d995-fb68f5a6.json b/tests/integration/inference/recordings/models-bee98cb55c3b74854d0bb71b23b7e01bbb9f1580b413a26dc3afbf9da8b7d995-fb68f5a6.json new file mode 100644 index 000000000..dc3944bb1 --- /dev/null +++ b/tests/integration/inference/recordings/models-bee98cb55c3b74854d0bb71b23b7e01bbb9f1580b413a26dc3afbf9da8b7d995-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_logprobs[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:log_probs]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374336, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-21db8cc1a31e41eaaa4e653435618645", + "object": "model_permission", + "created": 1762374336, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-c3d9f0302c09cecba4c3797ec2d65e358910e6194e13d1001fd3567ab2eff6aa-fb68f5a6.json b/tests/integration/inference/recordings/models-c3d9f0302c09cecba4c3797ec2d65e358910e6194e13d1001fd3567ab2eff6aa-fb68f5a6.json new file mode 100644 index 000000000..b81f6ed60 --- /dev/null +++ b/tests/integration/inference/recordings/models-c3d9f0302c09cecba4c3797ec2d65e358910e6194e13d1001fd3567ab2eff6aa-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374547, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-09fed2c5660e42658ab23c6d17b7840c", + "object": "model_permission", + "created": 1762374547, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-c6e251660301fe3f503b4c31dcb551087ca9118e65b97bd894954847723a9be0-fb68f5a6.json b/tests/integration/inference/recordings/models-c6e251660301fe3f503b4c31dcb551087ca9118e65b97bd894954847723a9be0-fb68f5a6.json new file mode 100644 index 000000000..03f377561 --- /dev/null +++ b/tests/integration/inference/recordings/models-c6e251660301fe3f503b4c31dcb551087ca9118e65b97bd894954847723a9be0-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:sanity]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374297, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-70d68a901d2445f6b7f470c600b34c78", + "object": "model_permission", + "created": 1762374297, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-cb1f7d5cd412fddb3395ef125bbcdac95c85585f23684e71abf142004b164bbc-fb68f5a6.json b/tests/integration/inference/recordings/models-cb1f7d5cd412fddb3395ef125bbcdac95c85585f23684e71abf142004b164bbc-fb68f5a6.json new file mode 100644 index 000000000..2d1759b41 --- /dev/null +++ b/tests/integration/inference/recordings/models-cb1f7d5cd412fddb3395ef125bbcdac95c85585f23684e71abf142004b164bbc-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=vllm/Qwen/Qwen3-0.6B-False]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375047, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-a8446fd6718649399402526dc6fe1477", + "object": "model_permission", + "created": 1762375047, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-cbecbec285766025f2bebca94904e63578190f33b47eb6f32cb4635a1b43e3cf-fb68f5a6.json b/tests/integration/inference/recordings/models-cbecbec285766025f2bebca94904e63578190f33b47eb6f32cb4635a1b43e3cf-fb68f5a6.json new file mode 100644 index 000000000..bac4b8cb4 --- /dev/null +++ b/tests/integration/inference/recordings/models-cbecbec285766025f2bebca94904e63578190f33b47eb6f32cb4635a1b43e3cf-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-True]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375254, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-f6eb51901e6443e492061deac904737c", + "object": "model_permission", + "created": 1762375254, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-d650458718dae3a10405ce1d241f0e1ceeeae8edf516cf10c611edcdf64035e3-fb68f5a6.json b/tests/integration/inference/recordings/models-d650458718dae3a10405ce1d241f0e1ceeeae8edf516cf10c611edcdf64035e3-fb68f5a6.json new file mode 100644 index 000000000..89aef16d5 --- /dev/null +++ b/tests/integration/inference/recordings/models-d650458718dae3a10405ce1d241f0e1ceeeae8edf516cf10c611edcdf64035e3-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375279, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-31e50ba39ad84a7daa1a24a3c77dc550", + "object": "model_permission", + "created": 1762375279, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-d8acc76e3d1b54eac9754a9d3a72c571fe3078b227a257aa15afdba946b69665-fb68f5a6.json b/tests/integration/inference/recordings/models-d8acc76e3d1b54eac9754a9d3a72c571fe3078b227a257aa15afdba946b69665-fb68f5a6.json new file mode 100644 index 000000000..a9b4bf369 --- /dev/null +++ b/tests/integration/inference/recordings/models-d8acc76e3d1b54eac9754a9d3a72c571fe3078b227a257aa15afdba946b69665-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375241, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-3ebcc379347541ea94de0f91838829e5", + "object": "model_permission", + "created": 1762375241, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-d9ff5f5ffaa7a64101936007fbe61cf2ed54f67609b54b56d92cb949234e3799-fb68f5a6.json b/tests/integration/inference/recordings/models-d9ff5f5ffaa7a64101936007fbe61cf2ed54f67609b54b56d92cb949234e3799-fb68f5a6.json new file mode 100644 index 000000000..4bd1dde93 --- /dev/null +++ b/tests/integration/inference/recordings/models-d9ff5f5ffaa7a64101936007fbe61cf2ed54f67609b54b56d92cb949234e3799-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374449, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-311f880045284a469a286b8039177d10", + "object": "model_permission", + "created": 1762374449, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-da380037dc0fe8ae61b838baf268e616057e46f8424df0a9b52f94e48cef4a7f-fb68f5a6.json b/tests/integration/inference/recordings/models-da380037dc0fe8ae61b838baf268e616057e46f8424df0a9b52f94e48cef4a7f-fb68f5a6.json new file mode 100644 index 000000000..fa5bddb15 --- /dev/null +++ b/tests/integration/inference/recordings/models-da380037dc0fe8ae61b838baf268e616057e46f8424df0a9b52f94e48cef4a7f-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=vllm/Qwen/Qwen3-0.6B-False]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375053, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-2e52800baf7e4d3389892f33feb3f52b", + "object": "model_permission", + "created": 1762375053, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-e42ca9261e3cee9c877322a51791ab6f113478170f8a21cd0a971c53b330e999-fb68f5a6.json b/tests/integration/inference/recordings/models-e42ca9261e3cee9c877322a51791ab6f113478170f8a21cd0a971c53b330e999-fb68f5a6.json new file mode 100644 index 000000000..5cdfadeb1 --- /dev/null +++ b/tests/integration/inference/recordings/models-e42ca9261e3cee9c877322a51791ab6f113478170f8a21cd0a971c53b330e999-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375150, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-cfec81fed838407597a92838017f3ef5", + "object": "model_permission", + "created": 1762375150, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-e5255919e39635597ad57c723896f9d258abaad9908b22ccd03c126ce597a5db-fb68f5a6.json b/tests/integration/inference/recordings/models-e5255919e39635597ad57c723896f9d258abaad9908b22ccd03c126ce597a5db-fb68f5a6.json new file mode 100644 index 000000000..82c9665de --- /dev/null +++ b/tests/integration/inference/recordings/models-e5255919e39635597ad57c723896f9d258abaad9908b22ccd03c126ce597a5db-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374466, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-8811d359d9724f8cac7fd6df608f69bd", + "object": "model_permission", + "created": 1762374466, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-e6664ff0c07b13aa2af6a85925f3841eef3907bc4a55f8bc352a8c960e782ada-fb68f5a6.json b/tests/integration/inference/recordings/models-e6664ff0c07b13aa2af6a85925f3841eef3907bc4a55f8bc352a8c960e782ada-fb68f5a6.json new file mode 100644 index 000000000..f83a6dea7 --- /dev/null +++ b/tests/integration/inference/recordings/models-e6664ff0c07b13aa2af6a85925f3841eef3907bc4a55f8bc352a8c960e782ada-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374482, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-d4b4923adfdf40b7bd7698aa798e68eb", + "object": "model_permission", + "created": 1762374482, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-edbd3344609a0fa1e97f75ede14a094a34db0dd6cb52975abae9f6e7832c6760-fb68f5a6.json b/tests/integration/inference/recordings/models-edbd3344609a0fa1e97f75ede14a094a34db0dd6cb52975abae9f6e7832c6760-fb68f5a6.json new file mode 100644 index 000000000..9084b4b59 --- /dev/null +++ b/tests/integration/inference/recordings/models-edbd3344609a0fa1e97f75ede14a094a34db0dd6cb52975abae9f6e7832c6760-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-False]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375291, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-a48cfd65bcd847d7aea01d44e8add51e", + "object": "model_permission", + "created": 1762375291, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-f6a9f5d7181cf078717443564e4de54e08845224d96b9c8150fb5cfda2068e82-fb68f5a6.json b/tests/integration/inference/recordings/models-f6a9f5d7181cf078717443564e4de54e08845224d96b9c8150fb5cfda2068e82-fb68f5a6.json new file mode 100644 index 000000000..9791dd1f7 --- /dev/null +++ b/tests/integration/inference/recordings/models-f6a9f5d7181cf078717443564e4de54e08845224d96b9c8150fb5cfda2068e82-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374517, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-916d53706b624fefb83e5dcc699e7a69", + "object": "model_permission", + "created": 1762374517, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-f936269fe152d95db3fb80fb10482e3cc79cfd6a28ebdf1a7a8b220ba2de641b-fb68f5a6.json b/tests/integration/inference/recordings/models-f936269fe152d95db3fb80fb10482e3cc79cfd6a28ebdf1a7a8b220ba2de641b-fb68f5a6.json new file mode 100644 index 000000000..c561e0df0 --- /dev/null +++ b/tests/integration/inference/recordings/models-f936269fe152d95db3fb80fb10482e3cc79cfd6a28ebdf1a7a8b220ba2de641b-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-False]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375285, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-e0640be42b814b3394545ebe92d844b3", + "object": "model_permission", + "created": 1762375285, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/suites.py b/tests/integration/suites.py index e1fb6a1c7..0cec66afe 100644 --- a/tests/integration/suites.py +++ b/tests/integration/suites.py @@ -78,7 +78,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = { "VLLM_URL": "http://localhost:8000/v1", }, defaults={ - "text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct", + "text_model": "vllm/Qwen/Qwen3-0.6B", "embedding_model": "sentence-transformers/nomic-embed-text-v1.5", }, ), @@ -169,6 +169,11 @@ SUITE_DEFINITIONS: dict[str, Suite] = { roots=base_roots, default_setup="ollama", ), + "base-vllm-subset": Suite( + name="base-vllm-subset", + roots=["tests/integration/inference"], + default_setup="vllm", + ), "responses": Suite( name="responses", roots=["tests/integration/responses"], diff --git a/tests/integration/telemetry/test_completions.py b/tests/integration/telemetry/test_completions.py index 2b8835f6c..af073d8bc 100644 --- a/tests/integration/telemetry/test_completions.py +++ b/tests/integration/telemetry/test_completions.py @@ -12,9 +12,13 @@ before and after each test, ensuring test isolation. import json +import pytest + def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id): """Verify streaming adds chunk_count and __type__=async_generator.""" + + pytest.skip("Disabled: See https://github.com/llamastack/llama-stack/issues/4089") stream = llama_stack_client.chat.completions.create( model=text_model_id, messages=[{"role": "user", "content": "Test trace openai 1"}], @@ -50,6 +54,7 @@ def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_mod def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id): """Comprehensive validation of telemetry data format including spans and metrics.""" + pytest.skip("Disabled: See https://github.com/llamastack/llama-stack/issues/4089") response = llama_stack_client.chat.completions.create( model=text_model_id, messages=[{"role": "user", "content": "Test trace openai with temperature 0.7"}], diff --git a/uv.lock b/uv.lock index de1c8879c..b2e562abc 100644 --- a/uv.lock +++ b/uv.lock @@ -1963,14 +1963,6 @@ dependencies = [ { name = "uvicorn" }, ] -[package.optional-dependencies] -ui = [ - { name = "llama-stack-client" }, - { name = "pandas" }, - { name = "streamlit" }, - { name = "streamlit-option-menu" }, -] - [package.dev-dependencies] benchmark = [ { name = "locust" }, @@ -2097,11 +2089,9 @@ requires-dist = [ { name = "jinja2", specifier = ">=3.1.6" }, { name = "jsonschema" }, { name = "llama-stack-client", specifier = ">=0.3.0" }, - { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.3.0" }, { name = "openai", specifier = ">=2.5.0" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, - { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, { name = "prompt-toolkit" }, { name = "pydantic", specifier = ">=2.11.9" }, @@ -2111,13 +2101,10 @@ requires-dist = [ { name = "rich" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" }, { name = "starlette" }, - { name = "streamlit", marker = "extra == 'ui'" }, - { name = "streamlit-option-menu", marker = "extra == 'ui'" }, { name = "termcolor" }, { name = "tiktoken" }, { name = "uvicorn", specifier = ">=0.34.0" }, ] -provides-extras = ["ui"] [package.metadata.requires-dev] benchmark = [{ name = "locust", specifier = ">=2.39.1" }]