diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index ec4d7f977..d44cba4ee 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -72,7 +72,8 @@ runs: echo "New recordings detected, committing and pushing" git add tests/integration/ - git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})" + git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})" + git fetch origin ${{ github.ref_name }} git rebase origin/${{ github.ref_name }} echo "Rebased successfully" @@ -88,6 +89,8 @@ runs: run: | # Ollama logs (if ollama container exists) sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true + # vllm logs (if vllm container exists) + sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true # Note: distro container logs are now dumped in integration-tests.sh before container is removed - name: Upload logs diff --git a/.github/actions/setup-vllm/action.yml b/.github/actions/setup-vllm/action.yml index 17ebd42f2..34ced0998 100644 --- a/.github/actions/setup-vllm/action.yml +++ b/.github/actions/setup-vllm/action.yml @@ -11,13 +11,14 @@ runs: --name vllm \ -p 8000:8000 \ --privileged=true \ - quay.io/higginsd/vllm-cpu:65393ee064 \ + quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \ --host 0.0.0.0 \ --port 8000 \ --enable-auto-tool-choice \ - --tool-call-parser llama3_json \ - --model /root/.cache/Llama-3.2-1B-Instruct \ - --served-model-name meta-llama/Llama-3.2-1B-Instruct + --tool-call-parser hermes \ + --model /root/.cache/Qwen3-0.6B \ + --served-model-name Qwen/Qwen3-0.6B \ + --max-model-len 8192 # Wait for vllm to be ready echo "Waiting for vllm to be ready..." diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 41822fa79..2c797e906 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -27,7 +27,6 @@ on: schedule: # If changing the cron schedule, update the provider in the test-matrix job - cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC - - cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC workflow_dispatch: inputs: test-all-client-versions: diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 5d9917bfd..448b08908 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -9976,6 +9976,70 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. + VectorStoreChunkingStrategy: + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + VectorStoreChunkingStrategyAuto: + type: object + properties: + type: + type: string + const: auto + default: auto + description: >- + Strategy type, always "auto" for automatic chunking + additionalProperties: false + required: + - type + title: VectorStoreChunkingStrategyAuto + description: >- + Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: + type: object + properties: + type: + type: string + const: static + default: static + description: >- + Strategy type, always "static" for static chunking + static: + $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' + description: >- + Configuration parameters for the static chunking strategy + additionalProperties: false + required: + - type + - static + title: VectorStoreChunkingStrategyStatic + description: >- + Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: + type: object + properties: + chunk_overlap_tokens: + type: integer + default: 400 + description: >- + Number of tokens to overlap between adjacent chunks + max_chunk_size_tokens: + type: integer + default: 800 + description: >- + Maximum number of tokens per chunk, must be between 100 and 4096 + additionalProperties: false + required: + - chunk_overlap_tokens + - max_chunk_size_tokens + title: VectorStoreChunkingStrategyStaticConfig + description: >- + Configuration for static chunking strategy. "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: @@ -10001,15 +10065,7 @@ components: description: >- (Optional) Expiration policy for the vector store chunking_strategy: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) Strategy for splitting files into chunks metadata: @@ -10085,70 +10141,6 @@ components: - deleted title: VectorStoreDeleteResponse description: Response from deleting a vector store. - VectorStoreChunkingStrategy: - oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' - discriminator: - propertyName: type - mapping: - auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' - static: '#/components/schemas/VectorStoreChunkingStrategyStatic' - VectorStoreChunkingStrategyAuto: - type: object - properties: - type: - type: string - const: auto - default: auto - description: >- - Strategy type, always "auto" for automatic chunking - additionalProperties: false - required: - - type - title: VectorStoreChunkingStrategyAuto - description: >- - Automatic chunking strategy for vector store files. - VectorStoreChunkingStrategyStatic: - type: object - properties: - type: - type: string - const: static - default: static - description: >- - Strategy type, always "static" for static chunking - static: - $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' - description: >- - Configuration parameters for the static chunking strategy - additionalProperties: false - required: - - type - - static - title: VectorStoreChunkingStrategyStatic - description: >- - Static chunking strategy with configurable parameters. - VectorStoreChunkingStrategyStaticConfig: - type: object - properties: - chunk_overlap_tokens: - type: integer - default: 400 - description: >- - Number of tokens to overlap between adjacent chunks - max_chunk_size_tokens: - type: integer - default: 800 - description: >- - Maximum number of tokens per chunk, must be between 100 and 4096 - additionalProperties: false - required: - - chunk_overlap_tokens - - max_chunk_size_tokens - title: VectorStoreChunkingStrategyStaticConfig - description: >- - Configuration for static chunking strategy. "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: @@ -10606,7 +10598,9 @@ components: description: >- Object type identifier for the search results page search_query: - type: string + type: array + items: + type: string description: >- The original search query that was executed data: diff --git a/docs/docs/distributions/self_hosted_distro/starter.md b/docs/docs/distributions/self_hosted_distro/starter.md index f6786a95c..84c35f3d3 100644 --- a/docs/docs/distributions/self_hosted_distro/starter.md +++ b/docs/docs/distributions/self_hosted_distro/starter.md @@ -163,7 +163,41 @@ docker run \ --port $LLAMA_STACK_PORT ``` -### Via venv +The container will run the distribution with a SQLite store by default. This store is used for the following components: + +- Metadata store: store metadata about the models, providers, etc. +- Inference store: collect of responses from the inference provider +- Agents store: store agent configurations (sessions, turns, etc.) +- Agents Responses store: store responses from the agents + +However, you can use PostgreSQL instead by running the `starter::run-with-postgres-store.yaml` configuration: + +```bash +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -e OPENAI_API_KEY=your_openai_key \ + -e FIREWORKS_API_KEY=your_fireworks_key \ + -e TOGETHER_API_KEY=your_together_key \ + -e POSTGRES_HOST=your_postgres_host \ + -e POSTGRES_PORT=your_postgres_port \ + -e POSTGRES_DB=your_postgres_db \ + -e POSTGRES_USER=your_postgres_user \ + -e POSTGRES_PASSWORD=your_postgres_password \ + llamastack/distribution-starter \ + starter::run-with-postgres-store.yaml +``` + +Postgres environment variables: + +- `POSTGRES_HOST`: Postgres host (default: `localhost`) +- `POSTGRES_PORT`: Postgres port (default: `5432`) +- `POSTGRES_DB`: Postgres database name (default: `llamastack`) +- `POSTGRES_USER`: Postgres username (default: `llamastack`) +- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`) + +### Via Conda or venv Ensure you have configured the starter distribution using the environment variables explained above. @@ -171,8 +205,11 @@ Ensure you have configured the starter distribution using the environment variab # Install dependencies for the starter distribution uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install -# Run the server +# Run the server (with SQLite - default) uv run --with llama-stack llama stack run starter + +# Or run with PostgreSQL +uv run --with llama-stack llama stack run starter::run-with-postgres-store.yaml ``` ## Example Usage diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx index 7a2931690..957cd04da 100644 --- a/docs/docs/providers/inference/remote_passthrough.mdx +++ b/docs/docs/providers/inference/remote_passthrough.mdx @@ -16,7 +16,7 @@ Passthrough inference provider for connecting to any external inference service |-------|------|----------|---------|-------------| | `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `url` | `` | No | | The URL for the passthrough endpoint | ## Sample Configuration diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index a705f499a..cc720ad18 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -9260,6 +9260,70 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. + VectorStoreChunkingStrategy: + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + VectorStoreChunkingStrategyAuto: + type: object + properties: + type: + type: string + const: auto + default: auto + description: >- + Strategy type, always "auto" for automatic chunking + additionalProperties: false + required: + - type + title: VectorStoreChunkingStrategyAuto + description: >- + Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: + type: object + properties: + type: + type: string + const: static + default: static + description: >- + Strategy type, always "static" for static chunking + static: + $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' + description: >- + Configuration parameters for the static chunking strategy + additionalProperties: false + required: + - type + - static + title: VectorStoreChunkingStrategyStatic + description: >- + Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: + type: object + properties: + chunk_overlap_tokens: + type: integer + default: 400 + description: >- + Number of tokens to overlap between adjacent chunks + max_chunk_size_tokens: + type: integer + default: 800 + description: >- + Maximum number of tokens per chunk, must be between 100 and 4096 + additionalProperties: false + required: + - chunk_overlap_tokens + - max_chunk_size_tokens + title: VectorStoreChunkingStrategyStaticConfig + description: >- + Configuration for static chunking strategy. "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: @@ -9285,15 +9349,7 @@ components: description: >- (Optional) Expiration policy for the vector store chunking_strategy: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) Strategy for splitting files into chunks metadata: @@ -9369,70 +9425,6 @@ components: - deleted title: VectorStoreDeleteResponse description: Response from deleting a vector store. - VectorStoreChunkingStrategy: - oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' - discriminator: - propertyName: type - mapping: - auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' - static: '#/components/schemas/VectorStoreChunkingStrategyStatic' - VectorStoreChunkingStrategyAuto: - type: object - properties: - type: - type: string - const: auto - default: auto - description: >- - Strategy type, always "auto" for automatic chunking - additionalProperties: false - required: - - type - title: VectorStoreChunkingStrategyAuto - description: >- - Automatic chunking strategy for vector store files. - VectorStoreChunkingStrategyStatic: - type: object - properties: - type: - type: string - const: static - default: static - description: >- - Strategy type, always "static" for static chunking - static: - $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' - description: >- - Configuration parameters for the static chunking strategy - additionalProperties: false - required: - - type - - static - title: VectorStoreChunkingStrategyStatic - description: >- - Static chunking strategy with configurable parameters. - VectorStoreChunkingStrategyStaticConfig: - type: object - properties: - chunk_overlap_tokens: - type: integer - default: 400 - description: >- - Number of tokens to overlap between adjacent chunks - max_chunk_size_tokens: - type: integer - default: 800 - description: >- - Maximum number of tokens per chunk, must be between 100 and 4096 - additionalProperties: false - required: - - chunk_overlap_tokens - - max_chunk_size_tokens - title: VectorStoreChunkingStrategyStaticConfig - description: >- - Configuration for static chunking strategy. "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: @@ -9890,7 +9882,9 @@ components: description: >- Object type identifier for the search results page search_query: - type: string + type: array + items: + type: string description: >- The original search query that was executed data: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 5d9917bfd..448b08908 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -9976,6 +9976,70 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. + VectorStoreChunkingStrategy: + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + VectorStoreChunkingStrategyAuto: + type: object + properties: + type: + type: string + const: auto + default: auto + description: >- + Strategy type, always "auto" for automatic chunking + additionalProperties: false + required: + - type + title: VectorStoreChunkingStrategyAuto + description: >- + Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: + type: object + properties: + type: + type: string + const: static + default: static + description: >- + Strategy type, always "static" for static chunking + static: + $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' + description: >- + Configuration parameters for the static chunking strategy + additionalProperties: false + required: + - type + - static + title: VectorStoreChunkingStrategyStatic + description: >- + Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: + type: object + properties: + chunk_overlap_tokens: + type: integer + default: 400 + description: >- + Number of tokens to overlap between adjacent chunks + max_chunk_size_tokens: + type: integer + default: 800 + description: >- + Maximum number of tokens per chunk, must be between 100 and 4096 + additionalProperties: false + required: + - chunk_overlap_tokens + - max_chunk_size_tokens + title: VectorStoreChunkingStrategyStaticConfig + description: >- + Configuration for static chunking strategy. "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: @@ -10001,15 +10065,7 @@ components: description: >- (Optional) Expiration policy for the vector store chunking_strategy: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) Strategy for splitting files into chunks metadata: @@ -10085,70 +10141,6 @@ components: - deleted title: VectorStoreDeleteResponse description: Response from deleting a vector store. - VectorStoreChunkingStrategy: - oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' - discriminator: - propertyName: type - mapping: - auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' - static: '#/components/schemas/VectorStoreChunkingStrategyStatic' - VectorStoreChunkingStrategyAuto: - type: object - properties: - type: - type: string - const: auto - default: auto - description: >- - Strategy type, always "auto" for automatic chunking - additionalProperties: false - required: - - type - title: VectorStoreChunkingStrategyAuto - description: >- - Automatic chunking strategy for vector store files. - VectorStoreChunkingStrategyStatic: - type: object - properties: - type: - type: string - const: static - default: static - description: >- - Strategy type, always "static" for static chunking - static: - $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' - description: >- - Configuration parameters for the static chunking strategy - additionalProperties: false - required: - - type - - static - title: VectorStoreChunkingStrategyStatic - description: >- - Static chunking strategy with configurable parameters. - VectorStoreChunkingStrategyStaticConfig: - type: object - properties: - chunk_overlap_tokens: - type: integer - default: 400 - description: >- - Number of tokens to overlap between adjacent chunks - max_chunk_size_tokens: - type: integer - default: 800 - description: >- - Maximum number of tokens per chunk, must be between 100 and 4096 - additionalProperties: false - required: - - chunk_overlap_tokens - - max_chunk_size_tokens - title: VectorStoreChunkingStrategyStaticConfig - description: >- - Configuration for static chunking strategy. "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: @@ -10606,7 +10598,9 @@ components: description: >- Object type identifier for the search results page search_query: - type: string + type: array + items: + type: string description: >- The original search query that was executed data: diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index 372e97d8c..2d088f3df 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -405,11 +405,6 @@ fi echo "=== Running Integration Tests ===" EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" -# Additional exclusions for vllm setup -if [[ "$TEST_SETUP" == "vllm" ]]; then - EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" -fi - PYTEST_PATTERN="not( $EXCLUDE_TESTS )" if [[ -n "$TEST_PATTERN" ]]; then PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN" diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py index f39212733..98f43f70d 100644 --- a/src/llama_stack/apis/vector_io/vector_io.py +++ b/src/llama_stack/apis/vector_io/vector_io.py @@ -260,7 +260,7 @@ class VectorStoreSearchResponsePage(BaseModel): """ object: str = "vector_store.search_results.page" - search_query: str + search_query: list[str] data: list[VectorStoreSearchResponse] has_more: bool = False next_page: str | None = None @@ -478,7 +478,7 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"): name: str | None = None file_ids: list[str] | None = None expires_after: dict[str, Any] | None = None - chunking_strategy: dict[str, Any] | None = None + chunking_strategy: VectorStoreChunkingStrategy | None = None metadata: dict[str, Any] | None = None diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index 78b38ba95..b54217619 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -20,6 +20,8 @@ from llama_stack.apis.vector_io import ( SearchRankingOptions, VectorIO, VectorStoreChunkingStrategy, + VectorStoreChunkingStrategyStatic, + VectorStoreChunkingStrategyStaticConfig, VectorStoreDeleteResponse, VectorStoreFileBatchObject, VectorStoreFileContentsResponse, @@ -167,6 +169,13 @@ class VectorIORouter(VectorIO): if embedding_dimension is not None: params.model_extra["embedding_dimension"] = embedding_dimension + # Set chunking strategy explicitly if not provided + if params.chunking_strategy is None or params.chunking_strategy.type == "auto": + # actualize the chunking strategy to static + params.chunking_strategy = VectorStoreChunkingStrategyStatic( + static=VectorStoreChunkingStrategyStaticConfig() + ) + return await provider.openai_create_vector_store(params) async def openai_list_vector_stores( @@ -283,6 +292,8 @@ class VectorIORouter(VectorIO): chunking_strategy: VectorStoreChunkingStrategy | None = None, ) -> VectorStoreFileObject: logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}") + if chunking_strategy is None or chunking_strategy.type == "auto": + chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig()) provider = await self.routing_table.get_provider_impl(vector_store_id) return await provider.openai_attach_file_to_vector_store( vector_store_id=vector_store_id, diff --git a/src/llama_stack/core/utils/config_resolution.py b/src/llama_stack/core/utils/config_resolution.py index fcf057db6..2a85837b6 100644 --- a/src/llama_stack/core/utils/config_resolution.py +++ b/src/llama_stack/core/utils/config_resolution.py @@ -52,7 +52,17 @@ def resolve_config_or_distro( logger.debug(f"Using distribution: {distro_config}") return distro_config - # Strategy 3: Try as built distribution name + # Strategy 3: Try as distro config path (if no .yaml extension and contains a slash) + # eg: starter::run-with-postgres-store.yaml + # Use :: to avoid slash and confusion with a filesystem path + if "::" in config_or_distro: + distro_name, config_name = config_or_distro.split("::") + distro_config = _get_distro_config_path(distro_name, config_name) + if distro_config.exists(): + logger.info(f"Using distribution: {distro_config}") + return distro_config + + # Strategy 4: Try as built distribution name distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml" if distrib_config.exists(): logger.debug(f"Using built distribution: {distrib_config}") @@ -63,13 +73,15 @@ def resolve_config_or_distro( logger.debug(f"Using built distribution: {distrib_config}") return distrib_config - # Strategy 4: Failed - provide helpful error + # Strategy 5: Failed - provide helpful error raise ValueError(_format_resolution_error(config_or_distro, mode)) -def _get_distro_config_path(distro_name: str, mode: Mode) -> Path: +def _get_distro_config_path(distro_name: str, mode: str) -> Path: """Get the config file path for a distro.""" - return DISTRO_DIR / distro_name / f"{mode}.yaml" + if not mode.endswith(".yaml"): + mode = f"{mode}.yaml" + return DISTRO_DIR / distro_name / mode def _format_resolution_error(config_or_distro: str, mode: Mode) -> str: diff --git a/src/llama_stack/core/utils/exec.py b/src/llama_stack/core/utils/exec.py index 12fb82d01..98964db2c 100644 --- a/src/llama_stack/core/utils/exec.py +++ b/src/llama_stack/core/utils/exec.py @@ -84,6 +84,15 @@ def run_command(command: list[str]) -> int: text=True, check=False, ) + + # Print stdout and stderr if command failed + if result.returncode != 0: + log.error(f"Command {' '.join(command)} failed with returncode {result.returncode}") + if result.stdout: + log.error(f"STDOUT: {result.stdout}") + if result.stderr: + log.error(f"STDERR: {result.stderr}") + return result.returncode except subprocess.SubprocessError as e: log.error(f"Subprocess error: {e}") diff --git a/src/llama_stack/distributions/ci-tests/build.yaml b/src/llama_stack/distributions/ci-tests/build.yaml index c01e415a9..f29ac7712 100644 --- a/src/llama_stack/distributions/ci-tests/build.yaml +++ b/src/llama_stack/distributions/ci-tests/build.yaml @@ -56,4 +56,5 @@ image_type: venv additional_pip_packages: - aiosqlite - asyncpg +- psycopg2-binary - sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py index ab102f5f3..c06b1b98d 100644 --- a/src/llama_stack/distributions/ci-tests/ci_tests.py +++ b/src/llama_stack/distributions/ci-tests/ci_tests.py @@ -13,5 +13,6 @@ from ..starter.starter import get_distribution_template as get_starter_distribut def get_distribution_template() -> DistributionTemplate: template = get_starter_distribution_template(name="ci-tests") template.description = "CI tests for Llama Stack" + template.run_configs.pop("run-with-postgres-store.yaml", None) return template diff --git a/src/llama_stack/distributions/postgres-demo/__init__.py b/src/llama_stack/distributions/postgres-demo/__init__.py deleted file mode 100644 index 81473cb73..000000000 --- a/src/llama_stack/distributions/postgres-demo/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .postgres_demo import get_distribution_template # noqa: F401 diff --git a/src/llama_stack/distributions/postgres-demo/build.yaml b/src/llama_stack/distributions/postgres-demo/build.yaml deleted file mode 100644 index 063dc3999..000000000 --- a/src/llama_stack/distributions/postgres-demo/build.yaml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers - providers: - inference: - - provider_type: remote::vllm - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: remote::chromadb - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol -image_type: venv -additional_pip_packages: -- asyncpg -- psycopg2-binary -- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/postgres-demo/postgres_demo.py b/src/llama_stack/distributions/postgres-demo/postgres_demo.py deleted file mode 100644 index 876370ef3..000000000 --- a/src/llama_stack/distributions/postgres-demo/postgres_demo.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.apis.models import ModelType -from llama_stack.core.datatypes import ( - BuildProvider, - ModelInput, - Provider, - ShieldInput, - ToolGroupInput, -) -from llama_stack.distributions.template import ( - DistributionTemplate, - RunConfigSettings, -) -from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig -from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig -from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig -from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig - - -def get_distribution_template() -> DistributionTemplate: - inference_providers = [ - Provider( - provider_id="vllm-inference", - provider_type="remote::vllm", - config=VLLMInferenceAdapterConfig.sample_run_config( - url="${env.VLLM_URL:=http://localhost:8000/v1}", - ), - ), - ] - providers = { - "inference": [ - BuildProvider(provider_type="remote::vllm"), - BuildProvider(provider_type="inline::sentence-transformers"), - ], - "vector_io": [BuildProvider(provider_type="remote::chromadb")], - "safety": [BuildProvider(provider_type="inline::llama-guard")], - "agents": [BuildProvider(provider_type="inline::meta-reference")], - "tool_runtime": [ - BuildProvider(provider_type="remote::brave-search"), - BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), - BuildProvider(provider_type="remote::model-context-protocol"), - ], - } - name = "postgres-demo" - - vector_io_providers = [ - Provider( - provider_id="${env.ENABLE_CHROMADB:+chromadb}", - provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - url="${env.CHROMADB_URL:=}", - ), - ), - ] - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ] - - default_models = [ - ModelInput( - model_id="${env.INFERENCE_MODEL}", - provider_id="vllm-inference", - ) - ] - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - embedding_model = ModelInput( - model_id="nomic-embed-text-v1.5", - provider_id=embedding_provider.provider_id, - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 768, - }, - ) - return DistributionTemplate( - name=name, - distro_type="self_hosted", - description="Quick start template for running Llama Stack with several popular providers", - container_image=None, - template_path=None, - providers=providers, - available_models_by_provider={}, - run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": inference_providers + [embedding_provider], - "vector_io": vector_io_providers, - }, - default_models=default_models + [embedding_model], - default_tool_groups=default_tool_groups, - default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], - storage_backends={ - "kv_default": PostgresKVStoreConfig.sample_run_config( - table_name="llamastack_kvstore", - ), - "sql_default": PostgresSqlStoreConfig.sample_run_config(), - }, - ), - }, - run_config_env_vars={ - "LLAMA_STACK_PORT": ( - "8321", - "Port for the Llama Stack distribution server", - ), - }, - ) diff --git a/src/llama_stack/distributions/starter-gpu/build.yaml b/src/llama_stack/distributions/starter-gpu/build.yaml index b2e2a0c85..10cbb1389 100644 --- a/src/llama_stack/distributions/starter-gpu/build.yaml +++ b/src/llama_stack/distributions/starter-gpu/build.yaml @@ -57,4 +57,5 @@ image_type: venv additional_pip_packages: - aiosqlite - asyncpg +- psycopg2-binary - sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml new file mode 100644 index 000000000..6dbbc8716 --- /dev/null +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -0,0 +1,281 @@ +version: 2 +image_name: starter-gpu +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:=} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + api_base: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + post_training: + - provider_id: huggingface-gpu + provider_type: inline::huggingface-gpu + config: + checkpoint_format: huggingface + distributed_backend: null + device: cpu + dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_postgres +storage: + backends: + kv_postgres: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_postgres: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_postgres + inference: + table_name: inference_store + backend: sql_postgres + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_postgres + prompts: + namespace: prompts + backend: kv_postgres +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: [] +server: + port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/starter/build.yaml b/src/llama_stack/distributions/starter/build.yaml index baa80ef3e..acd51f773 100644 --- a/src/llama_stack/distributions/starter/build.yaml +++ b/src/llama_stack/distributions/starter/build.yaml @@ -57,4 +57,5 @@ image_type: venv additional_pip_packages: - aiosqlite - asyncpg +- psycopg2-binary - sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml new file mode 100644 index 000000000..530084bd9 --- /dev/null +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -0,0 +1,278 @@ +version: 2 +image_name: starter +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:=} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + api_base: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + post_training: + - provider_id: torchtune-cpu + provider_type: inline::torchtune-cpu + config: + checkpoint_format: meta + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_postgres +storage: + backends: + kv_postgres: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_postgres: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_postgres + inference: + table_name: inference_store + backend: sql_postgres + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_postgres + prompts: + namespace: prompts + backend: kv_postgres +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: [] +server: + port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index 49b7a2463..88cd3a4fe 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -17,6 +17,11 @@ from llama_stack.core.datatypes import ( ToolGroupInput, VectorStoresConfig, ) +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + SqlStoreReference, +) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings from llama_stack.providers.datatypes import RemoteProviderSpec @@ -36,6 +41,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import ( ) from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig +from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig @@ -181,6 +187,62 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: provider_shield_id="${env.CODE_SCANNER_MODEL:=}", ), ] + postgres_config = PostgresSqlStoreConfig.sample_run_config() + default_overrides = { + "inference": remote_inference_providers + [embedding_provider], + "vector_io": [ + Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="sqlite-vec", + provider_type="inline::sqlite-vec", + config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="${env.MILVUS_URL:+milvus}", + provider_type="inline::milvus", + config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="${env.CHROMADB_URL:+chromadb}", + provider_type="remote::chromadb", + config=ChromaVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}/", + url="${env.CHROMADB_URL:=}", + ), + ), + Provider( + provider_id="${env.PGVECTOR_DB:+pgvector}", + provider_type="remote::pgvector", + config=PGVectorVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + db="${env.PGVECTOR_DB:=}", + user="${env.PGVECTOR_USER:=}", + password="${env.PGVECTOR_PASSWORD:=}", + ), + ), + Provider( + provider_id="${env.QDRANT_URL:+qdrant}", + provider_type="remote::qdrant", + config=QdrantVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + url="${env.QDRANT_URL:=}", + ), + ), + Provider( + provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}", + provider_type="remote::weaviate", + config=WeaviateVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + cluster_url="${env.WEAVIATE_CLUSTER_URL:=}", + ), + ), + ], + "files": [files_provider], + } return DistributionTemplate( name=name, @@ -189,64 +251,10 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - additional_pip_packages=PostgresSqlStoreConfig.pip_packages(), + additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())), run_configs={ "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": remote_inference_providers + [embedding_provider], - "vector_io": [ - Provider( - provider_id="faiss", - provider_type="inline::faiss", - config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ), - Provider( - provider_id="sqlite-vec", - provider_type="inline::sqlite-vec", - config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ), - Provider( - provider_id="${env.MILVUS_URL:+milvus}", - provider_type="inline::milvus", - config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ), - Provider( - provider_id="${env.CHROMADB_URL:+chromadb}", - provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}/", - url="${env.CHROMADB_URL:=}", - ), - ), - Provider( - provider_id="${env.PGVECTOR_DB:+pgvector}", - provider_type="remote::pgvector", - config=PGVectorVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - db="${env.PGVECTOR_DB:=}", - user="${env.PGVECTOR_USER:=}", - password="${env.PGVECTOR_PASSWORD:=}", - ), - ), - Provider( - provider_id="${env.QDRANT_URL:+qdrant}", - provider_type="remote::qdrant", - config=QdrantVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - url="${env.QDRANT_URL:=}", - ), - ), - Provider( - provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}", - provider_type="remote::weaviate", - config=WeaviateVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - cluster_url="${env.WEAVIATE_CLUSTER_URL:=}", - ), - ), - ], - "files": [files_provider], - }, + provider_overrides=default_overrides, default_models=[], default_tool_groups=default_tool_groups, default_shields=default_shields, @@ -261,6 +269,55 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: default_shield_id="llama-guard", ), ), + "run-with-postgres-store.yaml": RunConfigSettings( + provider_overrides={ + **default_overrides, + "agents": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + config=dict( + persistence_store=postgres_config, + responses_store=postgres_config, + ), + ) + ], + "batches": [ + Provider( + provider_id="reference", + provider_type="inline::reference", + config=dict( + kvstore=KVStoreReference( + backend="kv_postgres", + namespace="batches", + ).model_dump(exclude_none=True), + ), + ) + ], + }, + storage_backends={ + "kv_postgres": PostgresKVStoreConfig.sample_run_config(), + "sql_postgres": postgres_config, + }, + storage_stores={ + "metadata": KVStoreReference( + backend="kv_postgres", + namespace="registry", + ).model_dump(exclude_none=True), + "inference": InferenceStoreReference( + backend="sql_postgres", + table_name="inference_store", + ).model_dump(exclude_none=True), + "conversations": SqlStoreReference( + backend="sql_postgres", + table_name="openai_conversations", + ).model_dump(exclude_none=True), + "prompts": KVStoreReference( + backend="kv_postgres", + namespace="prompts", + ).model_dump(exclude_none=True), + }, + ), }, run_config_env_vars={ "LLAMA_STACK_PORT": ( diff --git a/src/llama_stack/providers/remote/inference/passthrough/__init__.py b/src/llama_stack/providers/remote/inference/passthrough/__init__.py index 69dd4c461..1cc46bff1 100644 --- a/src/llama_stack/providers/remote/inference/passthrough/__init__.py +++ b/src/llama_stack/providers/remote/inference/passthrough/__init__.py @@ -10,8 +10,8 @@ from .config import PassthroughImplConfig class PassthroughProviderDataValidator(BaseModel): - url: str - api_key: str + passthrough_url: str + passthrough_api_key: str async def get_adapter_impl(config: PassthroughImplConfig, _deps): diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py index f8e8b8ce5..eca28a86a 100644 --- a/src/llama_stack/providers/remote/inference/passthrough/config.py +++ b/src/llama_stack/providers/remote/inference/passthrough/config.py @@ -6,7 +6,7 @@ from typing import Any -from pydantic import Field, SecretStr +from pydantic import Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.schema_utils import json_schema_type @@ -19,11 +19,6 @@ class PassthroughImplConfig(RemoteInferenceProviderConfig): description="The URL for the passthrough endpoint", ) - api_key: SecretStr | None = Field( - default=None, - description="API Key for the passthrouth endpoint", - ) - @classmethod def sample_run_config( cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py index 4d4d4f41d..3c56acfbd 100644 --- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -5,9 +5,8 @@ # the root directory of this source tree. from collections.abc import AsyncIterator -from typing import Any -from llama_stack_client import AsyncLlamaStackClient +from openai import AsyncOpenAI from llama_stack.apis.inference import ( Inference, @@ -20,103 +19,117 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingsResponse, ) from llama_stack.apis.models import Model -from llama_stack.core.library_client import convert_pydantic_to_json_value -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper +from llama_stack.core.request_headers import NeedsRequestProviderData from .config import PassthroughImplConfig -class PassthroughInferenceAdapter(Inference): +class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference): def __init__(self, config: PassthroughImplConfig) -> None: - ModelRegistryHelper.__init__(self) self.config = config + async def initialize(self) -> None: + pass + + async def shutdown(self) -> None: + pass + async def unregister_model(self, model_id: str) -> None: pass async def register_model(self, model: Model) -> Model: return model - def _get_client(self) -> AsyncLlamaStackClient: - passthrough_url = None - passthrough_api_key = None - provider_data = None + async def list_models(self) -> list[Model]: + """List models by calling the downstream /v1/models endpoint.""" + client = self._get_openai_client() - if self.config.url is not None: - passthrough_url = self.config.url - else: - provider_data = self.get_request_provider_data() - if provider_data is None or not provider_data.passthrough_url: - raise ValueError( - 'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": }' - ) - passthrough_url = provider_data.passthrough_url + response = await client.models.list() - if self.config.api_key is not None: - passthrough_api_key = self.config.api_key.get_secret_value() - else: - provider_data = self.get_request_provider_data() - if provider_data is None or not provider_data.passthrough_api_key: - raise ValueError( - 'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": }' - ) - passthrough_api_key = provider_data.passthrough_api_key + # Convert from OpenAI format to Llama Stack Model format + models = [] + for model_data in response.data: + downstream_model_id = model_data.id + custom_metadata = getattr(model_data, "custom_metadata", {}) or {} - return AsyncLlamaStackClient( - base_url=passthrough_url, - api_key=passthrough_api_key, - provider_data=provider_data, + # Prefix identifier with provider ID for local registry + local_identifier = f"{self.__provider_id__}/{downstream_model_id}" + + model = Model( + identifier=local_identifier, + provider_id=self.__provider_id__, + provider_resource_id=downstream_model_id, + model_type=custom_metadata.get("model_type", "llm"), + metadata=custom_metadata, + ) + models.append(model) + + return models + + async def should_refresh_models(self) -> bool: + """Passthrough should refresh models since they come from downstream dynamically.""" + return self.config.refresh_models + + def _get_openai_client(self) -> AsyncOpenAI: + """Get an AsyncOpenAI client configured for the downstream server.""" + base_url = self._get_passthrough_url() + api_key = self._get_passthrough_api_key() + + return AsyncOpenAI( + base_url=f"{base_url.rstrip('/')}/v1", + api_key=api_key, ) - async def openai_embeddings( - self, - params: OpenAIEmbeddingsRequestWithExtraBody, - ) -> OpenAIEmbeddingsResponse: - raise NotImplementedError() + def _get_passthrough_url(self) -> str: + """Get the passthrough URL from config or provider data.""" + if self.config.url is not None: + return self.config.url + + provider_data = self.get_request_provider_data() + if provider_data is None: + raise ValueError( + 'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": }' + ) + return provider_data.passthrough_url + + def _get_passthrough_api_key(self) -> str: + """Get the passthrough API key from config or provider data.""" + if self.config.auth_credential is not None: + return self.config.auth_credential.get_secret_value() + + provider_data = self.get_request_provider_data() + if provider_data is None: + raise ValueError( + 'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": }' + ) + return provider_data.passthrough_api_key async def openai_completion( self, params: OpenAICompletionRequestWithExtraBody, ) -> OpenAICompletion: - client = self._get_client() - model_obj = await self.model_store.get_model(params.model) - - params = params.model_copy() - params.model = model_obj.provider_resource_id - + """Forward completion request to downstream using OpenAI client.""" + client = self._get_openai_client() request_params = params.model_dump(exclude_none=True) - - return await client.inference.openai_completion(**request_params) + response = await client.completions.create(**request_params) + return response # type: ignore async def openai_chat_completion( self, params: OpenAIChatCompletionRequestWithExtraBody, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - client = self._get_client() - model_obj = await self.model_store.get_model(params.model) - - params = params.model_copy() - params.model = model_obj.provider_resource_id - + """Forward chat completion request to downstream using OpenAI client.""" + client = self._get_openai_client() request_params = params.model_dump(exclude_none=True) + response = await client.chat.completions.create(**request_params) + return response # type: ignore - return await client.inference.openai_chat_completion(**request_params) - - def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]: - json_params = {} - for key, value in request_params.items(): - json_input = convert_pydantic_to_json_value(value) - if isinstance(json_input, dict): - json_input = {k: v for k, v in json_input.items() if v is not None} - elif isinstance(json_input, list): - json_input = [x for x in json_input if x is not None] - new_input = [] - for x in json_input: - if isinstance(x, dict): - x = {k: v for k, v in x.items() if v is not None} - new_input.append(x) - json_input = new_input - - json_params[key] = json_input - - return json_params + async def openai_embeddings( + self, + params: OpenAIEmbeddingsRequestWithExtraBody, + ) -> OpenAIEmbeddingsResponse: + """Forward embeddings request to downstream using OpenAI client.""" + client = self._get_openai_client() + request_params = params.model_dump(exclude_none=True) + response = await client.embeddings.create(**request_params) + return response # type: ignore diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 41d4cb2d7..d047d9d12 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -26,6 +26,7 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategy, VectorStoreChunkingStrategyAuto, VectorStoreChunkingStrategyStatic, + VectorStoreChunkingStrategyStaticConfig, VectorStoreContent, VectorStoreDeleteResponse, VectorStoreFileBatchObject, @@ -414,6 +415,10 @@ class OpenAIVectorStoreMixin(ABC): in_progress=0, total=0, ) + if not params.chunking_strategy or params.chunking_strategy.type == "auto": + chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig()) + else: + chunking_strategy = params.chunking_strategy store_info: dict[str, Any] = { "id": vector_store_id, "object": "vector_store", @@ -426,7 +431,7 @@ class OpenAIVectorStoreMixin(ABC): "expires_at": None, "last_active_at": created_at, "file_ids": [], - "chunking_strategy": params.chunking_strategy, + "chunking_strategy": chunking_strategy.model_dump(), } # Add provider information to metadata if provided @@ -637,7 +642,7 @@ class OpenAIVectorStoreMixin(ABC): break return VectorStoreSearchResponsePage( - search_query=search_query, + search_query=query if isinstance(query, list) else [query], data=data, has_more=False, # For simplicity, we don't implement pagination here next_page=None, @@ -647,7 +652,7 @@ class OpenAIVectorStoreMixin(ABC): logger.error(f"Error searching vector store {vector_store_id}: {e}") # Return empty results on error return VectorStoreSearchResponsePage( - search_query=search_query, + search_query=query if isinstance(query, list) else [query], data=[], has_more=False, next_page=None, @@ -886,8 +891,8 @@ class OpenAIVectorStoreMixin(ABC): # Determine pagination info has_more = len(file_objects) > limit - first_id = file_objects[0].id if file_objects else None - last_id = file_objects[-1].id if file_objects else None + first_id = limited_files[0].id if file_objects else None + last_id = limited_files[-1].id if file_objects else None return VectorStoreListFilesResponse( data=limited_files, diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json index 314070eab..858176dff 100644 --- a/tests/integration/ci_matrix.json +++ b/tests/integration/ci_matrix.json @@ -2,7 +2,8 @@ "default": [ {"suite": "base", "setup": "ollama"}, {"suite": "vision", "setup": "ollama-vision"}, - {"suite": "responses", "setup": "gpt"} + {"suite": "responses", "setup": "gpt"}, + {"suite": "base-vllm-subset", "setup": "vllm"} ], "schedules": { "1 0 * * 0": [ diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json new file mode 100644 index 000000000..00e0862e8 --- /dev/null +++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": null, + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374291, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-f70298e4ea3e4b4eb7f2cc2deb7a2b01", + "object": "model_permission", + "created": 1762374291, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json b/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json new file mode 100644 index 000000000..605baf12e --- /dev/null +++ b/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json @@ -0,0 +1,3010 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tools_and_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "system", + "content": "Pretend you are a weather assistant." + }, + { + "role": "user", + "content": "What's the weather like in San Francisco, CA?" + } + ], + "max_tokens": 4096, + "stream": true, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state (both required), e.g. San Francisco, CA." + } + }, + "required": [ + "location" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "Okay", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " user", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " asking", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " about", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " San", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " Francisco", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " CA", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " need", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " use", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " get", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "_weather", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " function", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " here", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " The", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " function", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " requires", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " location", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " parameter", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " which", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " this", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " case", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " \"", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "San", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " Francisco", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " CA", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "\".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " should", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " make", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " sure", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " include", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " both", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " city", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " state", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " as", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " specified", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " Let", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " me", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " check", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " if", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " there", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "'s", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " any", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " other", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " needed", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " but", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " user", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " just", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " wants", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " So", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " tool", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " call", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " should", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " be", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " straightforward", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "'ll", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " format", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " JSON", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " correctly", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " within", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " tool", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "_call", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": " tags", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": ".\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "\n\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": "chatcmpl-tool-33d90102b2fe4386808056bc3fa9ad17", + "function": { + "arguments": null, + "name": "get_weather" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "{\"location\": \"", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "San", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": " Francisco", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": ",", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": " CA", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "\"}", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0248ff8a1be5", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json b/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json new file mode 100644 index 000000000..bbb81ab62 --- /dev/null +++ b/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json @@ -0,0 +1,84 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_tool_without_schema[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "Call the no args tool" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "no_args_tool", + "description": "Tool with no arguments", + "parameters": { + "type": "object", + "properties": {} + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-452805c3c859", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user wants me to call the no args tool. Let me check the available functions. There's only one tool provided, which is the no_args_tool with no arguments. Since the user didn't specify any parameters, I should just return the tool call as instructed. I need to make sure the JSON is correctly formatted and within the XML tags. Alright, that's all I need.\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-7a67269afe214c85924c5171612bbdbd", + "function": { + "arguments": "{}", + "name": "no_args_tool" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 101, + "prompt_tokens": 136, + "total_tokens": 237, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json b/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json new file mode 100644 index 000000000..04c59b0b7 --- /dev/null +++ b/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json @@ -0,0 +1,92 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_chat_completion_with_tools[openai_client-txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "What's the weather in Tokyo?" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather information", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City name" + } + }, + "required": [ + "location" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-496035259763", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user is asking about the weather in Tokyo. I need to use the get_weather function for that. The function requires the location parameter, which in this case is Tokyo. I should make sure to specify \"Tokyo\" as the location. Let me check if there are any other parameters needed, but no, the function only needs the location. So the tool call should be straightforward. I'll format the JSON correctly inside the tool_call tags.\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-959b557fa67e4134a2391f5d35e5d5ae", + "function": { + "arguments": "{\"location\": \"Tokyo\"}", + "name": "get_weather" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 117, + "prompt_tokens": 158, + "total_tokens": 275, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json b/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json new file mode 100644 index 000000000..b33363e45 --- /dev/null +++ b/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json @@ -0,0 +1,92 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_format_preserves_complex_schemas[openai_client-txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "Process this data" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "process_data", + "description": "Process structured data", + "parameters": { + "type": "object", + "properties": { + "data": { + "$ref": "#/$defs/DataObject" + } + }, + "$defs": { + "DataObject": { + "type": "object", + "properties": { + "values": { + "type": "array", + "items": { + "type": "number" + } + } + } + } + } + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-524ead18daad", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user wants me to process the data. Let me check the available tools. There's a function called process_data that takes an object with a 'data' parameter. The data is an array of numbers. But the user hasn't provided any specific data yet. They just said \"Process this data.\" Hmm, maybe they expect me to prompt them for the data first. Wait, maybe there's a misunderstanding. Did they include the data in the conversation history? Let me look back. The user's message is \"Process this data.\" No data provided. Oh, maybe they made a mistake and forgot to include it. I need to ask them to provide the data so I can proceed. Let me confirm if there's any data mentioned. No, the current input is just the instruction. So I should ask the user to supply the data array of numbers to process.\n\n\nPlease provide the structured data you'd like me to process. For example, an array of numbers like `[1, 2, 3]`.", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 212, + "prompt_tokens": 180, + "total_tokens": 392, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json b/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json new file mode 100644 index 000000000..a20b23fa2 --- /dev/null +++ b/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json @@ -0,0 +1,2113 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestStreamingWithTools::test_streaming_tool_calls[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "What time is it in UTC?" + } + ], + "max_tokens": 4096, + "stream": true, + "tools": [ + { + "type": "function", + "function": { + "name": "get_time", + "description": "Get current time", + "parameters": { + "type": "object", + "properties": { + "timezone": { + "type": "string" + } + } + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "Okay", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " user", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " asking", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " time", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " UTC", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " Let", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " me", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " check", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " tools", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " available", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " There", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "'s", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " function", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " called", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " get", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "_time", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " takes", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " timezone", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " parameter", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " Since", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " UTC", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " standard", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " time", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " zone", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " need", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " specify", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " So", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "'ll", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " call", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " get", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "_time", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " timezone", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " set", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " \"", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "UTC", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "\".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " That", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " should", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " retrieve", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " time", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": " correctly", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": ".\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "\n\n", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": "chatcmpl-tool-41faa6bedd074d51a6335cd2447deeab", + "function": { + "arguments": null, + "name": "get_time" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "{\"timezone\": \"", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "UTC", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": [ + { + "index": 0, + "id": null, + "function": { + "arguments": "\"}", + "name": null + }, + "type": null + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-65eba1be095a", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": null, + "usage": null + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json b/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json new file mode 100644 index 000000000..539668be7 --- /dev/null +++ b/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json @@ -0,0 +1,98 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tools[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "system", + "content": "Pretend you are a weather assistant." + }, + { + "role": "user", + "content": "What's the weather like in San Francisco, CA?" + } + ], + "max_tokens": 4096, + "stream": false, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state (both required), e.g. San Francisco, CA." + } + }, + "required": [ + "location" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-744052775cf9", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user is asking about the weather in San Francisco, CA. I need to use the get_weather function. The function requires the location parameter, which is provided as San Francisco, CA. I should make sure to format the arguments correctly as a JSON object. Let me check the required parameters again. The location is required, so I can't omit it. I'll structure the tool call with the name \"get_weather\" and the arguments including \"location\": \"San Francisco, CA\". That should get the current weather information for the user.\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-b59dc311dd914d3dbd6d455b122bc39c", + "function": { + "arguments": "{\"location\": \"San Francisco, CA\"}", + "name": "get_weather" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 138, + "prompt_tokens": 185, + "total_tokens": 323, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json b/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json new file mode 100644 index 000000000..05b4e2609 --- /dev/null +++ b/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json @@ -0,0 +1,67 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tool_choice_none[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "system", + "content": "Pretend you are a weather assistant." + }, + { + "role": "user", + "content": "What's the weather like in San Francisco, CA?" + } + ], + "max_tokens": 4096, + "stream": false + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-77cf21828360", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user is asking about the weather in San Francisco, CA. I need to check the current weather conditions. But wait, I can't access real-time data. I should mention that I can't provide the current weather forecast and ask them to check a reliable source like the National Weather Service or a weather app. Also, maybe suggest they can provide more details if they need help with something else related to the weather.\n\n\nI'm sorry, but I can't provide real-time weather information. However, you can check the current weather for San Francisco, CA using the National Weather Service (NWS) website, weather apps like Weather.com, or local meteorological services. Let me know if there's anything else I can assist with!", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 154, + "prompt_tokens": 33, + "total_tokens": 187, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json b/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json new file mode 100644 index 000000000..2f6e4d3fc --- /dev/null +++ b/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json @@ -0,0 +1,128 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_multiple_tools_with_different_schemas[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "Use one of the available tools" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "simple", + "parameters": { + "type": "object", + "properties": { + "x": { + "type": "string" + } + } + } + } + }, + { + "type": "function", + "function": { + "name": "complex", + "parameters": { + "type": "object", + "properties": { + "data": { + "$ref": "#/$defs/Complex" + } + }, + "$defs": { + "Complex": { + "type": "object", + "properties": { + "nested": { + "type": "array", + "items": { + "type": "number" + } + } + } + } + } + } + } + }, + { + "type": "function", + "function": { + "name": "with_output", + "parameters": { + "type": "object", + "properties": { + "input": { + "type": "string" + } + } + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-853f6a700b98", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, let's see. The user wants me to use one of the available tools. The tools provided are simple, complex, and with_output. The simple function takes an argument 'x' of type string. The complex function requires a 'data' parameter that's an object with a nested array of numbers. The with_output function takes an input string.\n\nThe user's query is about using a tool, but there's no specific function name mentioned. Wait, maybe the user expects me to choose one based on the context. Since the tools are available, but the query is a general instruction, perhaps I should ask for clarification. However, the instructions say to use one of the tools if possible. Since the user hasn't specified a particular function, maybe I should check if there's any implicit function needed. But looking at the tools, none are directly related to the query. The user might need to specify which tool to use. Alternatively, maybe the answer is to call the simple function with an example input. But without more context, it's hard to tell. Wait, maybe the user expects me to choose the simplest one. Let's go with the simple function first. So the tool call would be to the simple function with x set to some value. But the user hasn't provided a specific value. Maybe I should state that the tool requires a value. But according to the instructions, if possible, use one of the tools. Since the user hasn't given a value, perhaps the answer is to call the simple function with an example. But the parameters for the simple function require 'x' which is a string. Maybe the user expects me to proceed without needing more info. So I'll proceed by calling the simple function with x as \"example\".\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-12e2ba0189cf484bb936cbb254a5c32a", + "function": { + "arguments": "{\"x\": \"example\"}", + "name": "simple" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 378, + "prompt_tokens": 265, + "total_tokens": 643, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json b/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json new file mode 100644 index 000000000..fdec100bb --- /dev/null +++ b/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json @@ -0,0 +1,114 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_tool_with_complex_schema[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "Book a flight from SFO to JFK for John Doe" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "book_flight", + "description": "Book a flight", + "parameters": { + "type": "object", + "properties": { + "flight": { + "$ref": "#/$defs/FlightInfo" + }, + "passenger": { + "$ref": "#/$defs/Passenger" + } + }, + "required": [ + "flight", + "passenger" + ], + "$defs": { + "FlightInfo": { + "type": "object", + "properties": { + "from": { + "type": "string" + }, + "to": { + "type": "string" + }, + "date": { + "type": "string", + "format": "date" + } + } + }, + "Passenger": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "age": { + "type": "integer" + } + } + } + } + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-cfb292c0f41d", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user wants to book a flight from SFO to JFK for John Doe. Let me check the tools available. The provided function is book_flight, which requires flight information and a passenger. The parameters needed are flight (as a FlightInfo object) and passenger (with name and age). The user mentioned SFO to JFK, so the flight details are from and to. The passenger's name is John Doe, but the age isn't provided. Wait, the function parameters require the passenger's name and age, but the user only mentioned the name. Maybe the age is missing? But the user didn't specify it, so perhaps I should note that the age is required. However, the function's required parameters are flight and passenger, so even if age is missing, the function can't be called without it. So I need to include both flight info and passenger details. The user's message only gives the name and destination, not the flight details or age. Therefore, I need to ask for the flight details and the passenger's age. But the user hasn't provided those. So I can't proceed with the function call. Wait, but maybe the user expects me to assume some default? No, the function requires all parameters. Since the user hasn't provided flight details or age, I can't call the function. So the correct response is to prompt the user for those details.\n\n\nThe booking requires the flight details and passenger's age. Could you provide the flight number and John Doe's age?", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 310, + "prompt_tokens": 261, + "total_tokens": 571, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json b/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json new file mode 100644 index 000000000..eb6eb8eb2 --- /dev/null +++ b/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json @@ -0,0 +1,96 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_structured_output[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:structured_output]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant. Michael Jordan was born in 1963. His first name is \"Michael\", He played basketball for the Chicago Bulls for 15 seasons and was drafted in 1984" + }, + { + "role": "user", + "content": "Please give me information about Michael Jordan." + } + ], + "max_tokens": 4096, + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "AnswerFormat", + "schema": { + "properties": { + "first_name": { + "title": "First Name", + "type": "string" + }, + "last_name": { + "title": "Last Name", + "type": "string" + }, + "year_of_birth": { + "title": "Year Of Birth", + "type": "integer" + } + }, + "required": [ + "first_name", + "last_name", + "year_of_birth" + ], + "title": "AnswerFormat", + "type": "object" + } + } + }, + "stream": false + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-df353403c7fb", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 28, + "prompt_tokens": 66, + "total_tokens": 94, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json b/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json new file mode 100644 index 000000000..856684a55 --- /dev/null +++ b/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json @@ -0,0 +1,92 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_simple_tool_call[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "What's the weather in San Francisco?" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City name" + } + }, + "required": [ + "location" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-e89112e7735f", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user is asking for the weather in San Francisco. I need to check if there's a function available for that. Looking at the tools provided, there's a function called get_weather that requires a location parameter. The description says it gets weather for a location, and the parameter is the city name. The user provided \"San Francisco\" as the location, so I should call the get_weather function with \"San Francisco\" as the argument. I don't see any other parameters needed here, so the tool call should be straightforward. Just make sure the city name is correctly formatted in JSON.\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-feead29842dc40b2831c41ed397f555f", + "function": { + "arguments": "{\"location\": \"San Francisco\"}", + "name": "get_weather" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 146, + "prompt_tokens": 161, + "total_tokens": 307, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json b/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json new file mode 100644 index 000000000..28f7d8296 --- /dev/null +++ b/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json @@ -0,0 +1,53 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "prompt": "I am feeling really sad today.", + "stream": false, + "extra_body": { + "guided_choice": [ + "joy", + "sadness" + ] + } + }, + "endpoint": "/v1/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "rec-f02f1bfd75ad", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "text": "joy", + "stop_reason": null, + "prompt_logprobs": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "text_completion", + "system_fingerprint": null, + "usage": { + "completion_tokens": 2, + "prompt_tokens": 7, + "total_tokens": 9, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json b/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json new file mode 100644 index 000000000..7256ae75d --- /dev/null +++ b/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375180, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-aeeb49e5e51c42fa94562780165bd620", + "object": "model_permission", + "created": 1762375180, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json b/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json new file mode 100644 index 000000000..e0f5fa68f --- /dev/null +++ b/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375115, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-feec0a894be04f738e12b596ff163b64", + "object": "model_permission", + "created": 1762375115, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json b/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json new file mode 100644 index 000000000..0eb0a26a6 --- /dev/null +++ b/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_stop_sequence[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:stop_sequence]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374330, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-119e17052e4c4c13bd791af3138d5360", + "object": "model_permission", + "created": 1762374330, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json b/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json new file mode 100644 index 000000000..dc7c97f4f --- /dev/null +++ b/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375226, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-c6ae673fda084519b3c67947896cd3b0", + "object": "model_permission", + "created": 1762375226, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json b/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json new file mode 100644 index 000000000..833003741 --- /dev/null +++ b/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374573, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-3f422354a81e491b87f93d5b192a0e1a", + "object": "model_permission", + "created": 1762374573, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json b/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json new file mode 100644 index 000000000..df660a0f1 --- /dev/null +++ b/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374305, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-794e16e59ddb4216a8bedfdf485b8f24", + "object": "model_permission", + "created": 1762374305, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json b/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json new file mode 100644 index 000000000..fed71ffa7 --- /dev/null +++ b/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=vllm/Qwen/Qwen3-0.6B-True]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374317, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-ff7d26d076eb4373a0631a80fe3ae063", + "object": "model_permission", + "created": 1762374317, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json b/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json new file mode 100644 index 000000000..9a532d386 --- /dev/null +++ b/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375033, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-2a16fede981b43be9e1cbe3dbedd1e74", + "object": "model_permission", + "created": 1762375033, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json b/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json new file mode 100644 index 000000000..ab3269b57 --- /dev/null +++ b/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374297, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-4bc93704559a4e1d8492aeec7222040c", + "object": "model_permission", + "created": 1762374297, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-394c30370fe5b724c5fe1292984373b281d47b2ac0d49e8b598f13cf100b3ad8-fb68f5a6.json b/tests/integration/inference/recordings/models-394c30370fe5b724c5fe1292984373b281d47b2ac0d49e8b598f13cf100b3ad8-fb68f5a6.json new file mode 100644 index 000000000..8237cc20c --- /dev/null +++ b/tests/integration/inference/recordings/models-394c30370fe5b724c5fe1292984373b281d47b2ac0d49e8b598f13cf100b3ad8-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374532, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-e353aa079d5145c19953791ac99daeba", + "object": "model_permission", + "created": 1762374532, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-3f4208962fdb2be3e7057777fc93a149890bd1dfa8a92597e176f23658e86cd8-fb68f5a6.json b/tests/integration/inference/recordings/models-3f4208962fdb2be3e7057777fc93a149890bd1dfa8a92597e176f23658e86cd8-fb68f5a6.json new file mode 100644 index 000000000..14b37fb0a --- /dev/null +++ b/tests/integration/inference/recordings/models-3f4208962fdb2be3e7057777fc93a149890bd1dfa8a92597e176f23658e86cd8-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-True]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375260, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-10c27d1c9e324b18b65321b422e19af9", + "object": "model_permission", + "created": 1762375260, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-4a729b00af209ad60846d1904e5973ad081aa5f595de50f5ef1aae304cb67ef3-fb68f5a6.json b/tests/integration/inference/recordings/models-4a729b00af209ad60846d1904e5973ad081aa5f595de50f5ef1aae304cb67ef3-fb68f5a6.json new file mode 100644 index 000000000..4af25e17a --- /dev/null +++ b/tests/integration/inference/recordings/models-4a729b00af209ad60846d1904e5973ad081aa5f595de50f5ef1aae304cb67ef3-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375040, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-f01c211577294936958dd28046c89dba", + "object": "model_permission", + "created": 1762375040, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-4f62bcb9cdf74f4c2ed804038def162f18ad384182b0f174918607e9ed3c1515-fb68f5a6.json b/tests/integration/inference/recordings/models-4f62bcb9cdf74f4c2ed804038def162f18ad384182b0f174918607e9ed3c1515-fb68f5a6.json new file mode 100644 index 000000000..54b48967b --- /dev/null +++ b/tests/integration/inference/recordings/models-4f62bcb9cdf74f4c2ed804038def162f18ad384182b0f174918607e9ed3c1515-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375266, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-7166a6fcd331435eb2d0f0a6b23382ed", + "object": "model_permission", + "created": 1762375266, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-54b420cdb98a0149a618088f55746e26b7bf6e7c5ebf5fa07c13ec9e366521d3-fb68f5a6.json b/tests/integration/inference/recordings/models-54b420cdb98a0149a618088f55746e26b7bf6e7c5ebf5fa07c13ec9e366521d3-fb68f5a6.json new file mode 100644 index 000000000..30a33793b --- /dev/null +++ b/tests/integration/inference/recordings/models-54b420cdb98a0149a618088f55746e26b7bf6e7c5ebf5fa07c13ec9e366521d3-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374301, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-cd16b092c5a04e719ddf786f0c3e935e", + "object": "model_permission", + "created": 1762374301, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-613f5d11a8cda7126115f96650334fde0a0457a6b4a2605bc15eec9b50a6956c-fb68f5a6.json b/tests/integration/inference/recordings/models-613f5d11a8cda7126115f96650334fde0a0457a6b4a2605bc15eec9b50a6956c-fb68f5a6.json new file mode 100644 index 000000000..4193dce51 --- /dev/null +++ b/tests/integration/inference/recordings/models-613f5d11a8cda7126115f96650334fde0a0457a6b4a2605bc15eec9b50a6956c-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:sanity]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374295, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-9f71adbb206846bb9d0e12834e41551e", + "object": "model_permission", + "created": 1762374295, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-62a361f55d61a98ea0863e9acfb5ab5d540c5d19e791415ee476474f7f1ed90f-fb68f5a6.json b/tests/integration/inference/recordings/models-62a361f55d61a98ea0863e9acfb5ab5d540c5d19e791415ee476474f7f1ed90f-fb68f5a6.json new file mode 100644 index 000000000..d5916fff7 --- /dev/null +++ b/tests/integration/inference/recordings/models-62a361f55d61a98ea0863e9acfb5ab5d540c5d19e791415ee476474f7f1ed90f-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_logprobs_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:log_probs]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374342, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-a8b7b38c40584a03b4b346b6c181fb93", + "object": "model_permission", + "created": 1762374342, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-668fc72f70ac72d5c112fe79d86d5c790611456b3f0102832f27e6edd420ab54-fb68f5a6.json b/tests/integration/inference/recordings/models-668fc72f70ac72d5c112fe79d86d5c790611456b3f0102832f27e6edd420ab54-fb68f5a6.json new file mode 100644 index 000000000..1542aa0cf --- /dev/null +++ b/tests/integration/inference/recordings/models-668fc72f70ac72d5c112fe79d86d5c790611456b3f0102832f27e6edd420ab54-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375235, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-dd48560646f141298f5cc2ef3467e54b", + "object": "model_permission", + "created": 1762375235, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-702eee4572e9b17ff0b0fdd55b10021f7077f0afcba922d6a53db0b537542518-fb68f5a6.json b/tests/integration/inference/recordings/models-702eee4572e9b17ff0b0fdd55b10021f7077f0afcba922d6a53db0b537542518-fb68f5a6.json new file mode 100644 index 000000000..fed0e7785 --- /dev/null +++ b/tests/integration/inference/recordings/models-702eee4572e9b17ff0b0fdd55b10021f7077f0afcba922d6a53db0b537542518-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374500, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-0ba0c3a54dcb4e57bc0308fd54425933", + "object": "model_permission", + "created": 1762374500, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-723d37a5bceab199cff076a0dcc2d4ee7596b7c800f13c64f6a6ecdbf4ed2f3a-fb68f5a6.json b/tests/integration/inference/recordings/models-723d37a5bceab199cff076a0dcc2d4ee7596b7c800f13c64f6a6ecdbf4ed2f3a-fb68f5a6.json new file mode 100644 index 000000000..f53aadba5 --- /dev/null +++ b/tests/integration/inference/recordings/models-723d37a5bceab199cff076a0dcc2d4ee7596b7c800f13c64f6a6ecdbf4ed2f3a-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=vllm/Qwen/Qwen3-0.6B-True]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374311, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-e95a9ed7439245b5995add97fb50f765", + "object": "model_permission", + "created": 1762374311, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-768c497339830cf86ddd7843f33d0ed06b3bce3ef2ae9f854364b534ba8cafb7-fb68f5a6.json b/tests/integration/inference/recordings/models-768c497339830cf86ddd7843f33d0ed06b3bce3ef2ae9f854364b534ba8cafb7-fb68f5a6.json new file mode 100644 index 000000000..5f5d13e94 --- /dev/null +++ b/tests/integration/inference/recordings/models-768c497339830cf86ddd7843f33d0ed06b3bce3ef2ae9f854364b534ba8cafb7-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375099, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-6b5eba46536f43df902871dd257e1676", + "object": "model_permission", + "created": 1762375099, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-7b0f2493d699e58cdfe0a9dab38f4423771c8ebced2020b1e15cbb35470c1ca2-fb68f5a6.json b/tests/integration/inference/recordings/models-7b0f2493d699e58cdfe0a9dab38f4423771c8ebced2020b1e15cbb35470c1ca2-fb68f5a6.json new file mode 100644 index 000000000..69b91db75 --- /dev/null +++ b/tests/integration/inference/recordings/models-7b0f2493d699e58cdfe0a9dab38f4423771c8ebced2020b1e15cbb35470c1ca2-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375207, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-bbfbcf20cac146e0ae5e45ae6a42632d", + "object": "model_permission", + "created": 1762375207, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-7ed97509ff199eabe1380caa36b9e5934e9d04a9cafcfa2d21d20f6f85679ae4-fb68f5a6.json b/tests/integration/inference/recordings/models-7ed97509ff199eabe1380caa36b9e5934e9d04a9cafcfa2d21d20f6f85679ae4-fb68f5a6.json new file mode 100644 index 000000000..d6d7b81a0 --- /dev/null +++ b/tests/integration/inference/recordings/models-7ed97509ff199eabe1380caa36b9e5934e9d04a9cafcfa2d21d20f6f85679ae4-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375273, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-4935d35e00fd4acdbe78662f42342e77", + "object": "model_permission", + "created": 1762375273, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-805e6b510b1ab33505a1af85c0d2a766cd3415512212d80f6292ca0ef5c359e1-fb68f5a6.json b/tests/integration/inference/recordings/models-805e6b510b1ab33505a1af85c0d2a766cd3415512212d80f6292ca0ef5c359e1-fb68f5a6.json new file mode 100644 index 000000000..1e7cb92bf --- /dev/null +++ b/tests/integration/inference/recordings/models-805e6b510b1ab33505a1af85c0d2a766cd3415512212d80f6292ca0ef5c359e1-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374591, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-e19031997a1e44d99c8b5ae55725a887", + "object": "model_permission", + "created": 1762374591, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-860b7e8309e0761e20e845be75c0a28d759384a367f6308f2a921702318a5dba-fb68f5a6.json b/tests/integration/inference/recordings/models-860b7e8309e0761e20e845be75c0a28d759384a367f6308f2a921702318a5dba-fb68f5a6.json new file mode 100644 index 000000000..2e4cb4cb0 --- /dev/null +++ b/tests/integration/inference/recordings/models-860b7e8309e0761e20e845be75c0a28d759384a367f6308f2a921702318a5dba-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375027, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-ec44b40a73b04912a837001376b59cff", + "object": "model_permission", + "created": 1762375027, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-8903569d538f9836ac6251d90c4668d3057e8e0ced847a08fd7a6faedb5710c3-fb68f5a6.json b/tests/integration/inference/recordings/models-8903569d538f9836ac6251d90c4668d3057e8e0ced847a08fd7a6faedb5710c3-fb68f5a6.json new file mode 100644 index 000000000..4ab79796c --- /dev/null +++ b/tests/integration/inference/recordings/models-8903569d538f9836ac6251d90c4668d3057e8e0ced847a08fd7a6faedb5710c3-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374356, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-3203232f1dbd426aba98ef1593dd3c01", + "object": "model_permission", + "created": 1762374356, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-8aa8c593dd64639678c294146fd56804393856c6e85197e6317ebd88351be21d-fb68f5a6.json b/tests/integration/inference/recordings/models-8aa8c593dd64639678c294146fd56804393856c6e85197e6317ebd88351be21d-fb68f5a6.json new file mode 100644 index 000000000..f2c124b73 --- /dev/null +++ b/tests/integration/inference/recordings/models-8aa8c593dd64639678c294146fd56804393856c6e85197e6317ebd88351be21d-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375248, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-5efe67a621074e979edaaf8fcfee9a80", + "object": "model_permission", + "created": 1762375248, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-8fc4c7b563b9bd423b74dcb4683039248f41d86c02703bd2dce845d972c9ae6f-fb68f5a6.json b/tests/integration/inference/recordings/models-8fc4c7b563b9bd423b74dcb4683039248f41d86c02703bd2dce845d972c9ae6f-fb68f5a6.json new file mode 100644 index 000000000..0b5b6e4ed --- /dev/null +++ b/tests/integration/inference/recordings/models-8fc4c7b563b9bd423b74dcb4683039248f41d86c02703bd2dce845d972c9ae6f-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375135, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-5509cf924e5e4fc89091e4593f264258", + "object": "model_permission", + "created": 1762375135, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-99ae704b53e3e3150cac5cd579e446e6545a4ab6a63048ce00ee1fbe5fbf1b4e-fb68f5a6.json b/tests/integration/inference/recordings/models-99ae704b53e3e3150cac5cd579e446e6545a4ab6a63048ce00ee1fbe5fbf1b4e-fb68f5a6.json new file mode 100644 index 000000000..f8ded3ee4 --- /dev/null +++ b/tests/integration/inference/recordings/models-99ae704b53e3e3150cac5cd579e446e6545a4ab6a63048ce00ee1fbe5fbf1b4e-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374301, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-72ed55b56df1471b9f71c48bacf8b768", + "object": "model_permission", + "created": 1762374301, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-9a36a281899f0800f085473f5f0185b09a02022607b965ac08b4db2e9e7eabc9-fb68f5a6.json b/tests/integration/inference/recordings/models-9a36a281899f0800f085473f5f0185b09a02022607b965ac08b4db2e9e7eabc9-fb68f5a6.json new file mode 100644 index 000000000..32ecdf0b6 --- /dev/null +++ b/tests/integration/inference/recordings/models-9a36a281899f0800f085473f5f0185b09a02022607b965ac08b4db2e9e7eabc9-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:suffix]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374295, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-abbbfbb49abc4312b2b2011d4d2ba19b", + "object": "model_permission", + "created": 1762374295, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-9beac41c66cbe8568bb72b5ba0f5608597ef8a14b42585c22b1e7c45526537c1-fb68f5a6.json b/tests/integration/inference/recordings/models-9beac41c66cbe8568bb72b5ba0f5608597ef8a14b42585c22b1e7c45526537c1-fb68f5a6.json new file mode 100644 index 000000000..454b7223f --- /dev/null +++ b/tests/integration/inference/recordings/models-9beac41c66cbe8568bb72b5ba0f5608597ef8a14b42585c22b1e7c45526537c1-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375065, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-d943fbda14264715906334300853cec7", + "object": "model_permission", + "created": 1762375065, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-a495ae010d48bb3649c822e3299e819c164c2311db231c81296ff4c72e6f81cb-fb68f5a6.json b/tests/integration/inference/recordings/models-a495ae010d48bb3649c822e3299e819c164c2311db231c81296ff4c72e6f81cb-fb68f5a6.json new file mode 100644 index 000000000..1eded64dd --- /dev/null +++ b/tests/integration/inference/recordings/models-a495ae010d48bb3649c822e3299e819c164c2311db231c81296ff4c72e6f81cb-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming_with_file[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374323, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-15a0a1106fff4fdd8ce7574373fe3cee", + "object": "model_permission", + "created": 1762374323, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-a77b3cb7370fd9f46e6ea12d72e1d9a8e7515f745289e93e5eb4a21d0e7b71b7-fb68f5a6.json b/tests/integration/inference/recordings/models-a77b3cb7370fd9f46e6ea12d72e1d9a8e7515f745289e93e5eb4a21d0e7b71b7-fb68f5a6.json new file mode 100644 index 000000000..9501f622a --- /dev/null +++ b/tests/integration/inference/recordings/models-a77b3cb7370fd9f46e6ea12d72e1d9a8e7515f745289e93e5eb4a21d0e7b71b7-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375082, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-ef1d3bc6fefc432380ef0eabdf216fd3", + "object": "model_permission", + "created": 1762375082, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-a82e913e058618dcb30b269a54d4e6a9cb1e0017a42efe04480874fe957194d4-fb68f5a6.json b/tests/integration/inference/recordings/models-a82e913e058618dcb30b269a54d4e6a9cb1e0017a42efe04480874fe957194d4-fb68f5a6.json new file mode 100644 index 000000000..6558ad3c4 --- /dev/null +++ b/tests/integration/inference/recordings/models-a82e913e058618dcb30b269a54d4e6a9cb1e0017a42efe04480874fe957194d4-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375165, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-b29f7386725b4f13976cd76b6dc3a278", + "object": "model_permission", + "created": 1762375165, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-bee98cb55c3b74854d0bb71b23b7e01bbb9f1580b413a26dc3afbf9da8b7d995-fb68f5a6.json b/tests/integration/inference/recordings/models-bee98cb55c3b74854d0bb71b23b7e01bbb9f1580b413a26dc3afbf9da8b7d995-fb68f5a6.json new file mode 100644 index 000000000..dc3944bb1 --- /dev/null +++ b/tests/integration/inference/recordings/models-bee98cb55c3b74854d0bb71b23b7e01bbb9f1580b413a26dc3afbf9da8b7d995-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_logprobs[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:log_probs]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374336, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-21db8cc1a31e41eaaa4e653435618645", + "object": "model_permission", + "created": 1762374336, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-c3d9f0302c09cecba4c3797ec2d65e358910e6194e13d1001fd3567ab2eff6aa-fb68f5a6.json b/tests/integration/inference/recordings/models-c3d9f0302c09cecba4c3797ec2d65e358910e6194e13d1001fd3567ab2eff6aa-fb68f5a6.json new file mode 100644 index 000000000..b81f6ed60 --- /dev/null +++ b/tests/integration/inference/recordings/models-c3d9f0302c09cecba4c3797ec2d65e358910e6194e13d1001fd3567ab2eff6aa-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374547, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-09fed2c5660e42658ab23c6d17b7840c", + "object": "model_permission", + "created": 1762374547, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-c6e251660301fe3f503b4c31dcb551087ca9118e65b97bd894954847723a9be0-fb68f5a6.json b/tests/integration/inference/recordings/models-c6e251660301fe3f503b4c31dcb551087ca9118e65b97bd894954847723a9be0-fb68f5a6.json new file mode 100644 index 000000000..03f377561 --- /dev/null +++ b/tests/integration/inference/recordings/models-c6e251660301fe3f503b4c31dcb551087ca9118e65b97bd894954847723a9be0-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:sanity]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374297, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-70d68a901d2445f6b7f470c600b34c78", + "object": "model_permission", + "created": 1762374297, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-cb1f7d5cd412fddb3395ef125bbcdac95c85585f23684e71abf142004b164bbc-fb68f5a6.json b/tests/integration/inference/recordings/models-cb1f7d5cd412fddb3395ef125bbcdac95c85585f23684e71abf142004b164bbc-fb68f5a6.json new file mode 100644 index 000000000..2d1759b41 --- /dev/null +++ b/tests/integration/inference/recordings/models-cb1f7d5cd412fddb3395ef125bbcdac95c85585f23684e71abf142004b164bbc-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=vllm/Qwen/Qwen3-0.6B-False]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375047, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-a8446fd6718649399402526dc6fe1477", + "object": "model_permission", + "created": 1762375047, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-cbecbec285766025f2bebca94904e63578190f33b47eb6f32cb4635a1b43e3cf-fb68f5a6.json b/tests/integration/inference/recordings/models-cbecbec285766025f2bebca94904e63578190f33b47eb6f32cb4635a1b43e3cf-fb68f5a6.json new file mode 100644 index 000000000..bac4b8cb4 --- /dev/null +++ b/tests/integration/inference/recordings/models-cbecbec285766025f2bebca94904e63578190f33b47eb6f32cb4635a1b43e3cf-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-True]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375254, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-f6eb51901e6443e492061deac904737c", + "object": "model_permission", + "created": 1762375254, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-d650458718dae3a10405ce1d241f0e1ceeeae8edf516cf10c611edcdf64035e3-fb68f5a6.json b/tests/integration/inference/recordings/models-d650458718dae3a10405ce1d241f0e1ceeeae8edf516cf10c611edcdf64035e3-fb68f5a6.json new file mode 100644 index 000000000..89aef16d5 --- /dev/null +++ b/tests/integration/inference/recordings/models-d650458718dae3a10405ce1d241f0e1ceeeae8edf516cf10c611edcdf64035e3-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375279, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-31e50ba39ad84a7daa1a24a3c77dc550", + "object": "model_permission", + "created": 1762375279, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-d8acc76e3d1b54eac9754a9d3a72c571fe3078b227a257aa15afdba946b69665-fb68f5a6.json b/tests/integration/inference/recordings/models-d8acc76e3d1b54eac9754a9d3a72c571fe3078b227a257aa15afdba946b69665-fb68f5a6.json new file mode 100644 index 000000000..a9b4bf369 --- /dev/null +++ b/tests/integration/inference/recordings/models-d8acc76e3d1b54eac9754a9d3a72c571fe3078b227a257aa15afdba946b69665-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375241, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-3ebcc379347541ea94de0f91838829e5", + "object": "model_permission", + "created": 1762375241, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-d9ff5f5ffaa7a64101936007fbe61cf2ed54f67609b54b56d92cb949234e3799-fb68f5a6.json b/tests/integration/inference/recordings/models-d9ff5f5ffaa7a64101936007fbe61cf2ed54f67609b54b56d92cb949234e3799-fb68f5a6.json new file mode 100644 index 000000000..4bd1dde93 --- /dev/null +++ b/tests/integration/inference/recordings/models-d9ff5f5ffaa7a64101936007fbe61cf2ed54f67609b54b56d92cb949234e3799-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374449, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-311f880045284a469a286b8039177d10", + "object": "model_permission", + "created": 1762374449, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-da380037dc0fe8ae61b838baf268e616057e46f8424df0a9b52f94e48cef4a7f-fb68f5a6.json b/tests/integration/inference/recordings/models-da380037dc0fe8ae61b838baf268e616057e46f8424df0a9b52f94e48cef4a7f-fb68f5a6.json new file mode 100644 index 000000000..fa5bddb15 --- /dev/null +++ b/tests/integration/inference/recordings/models-da380037dc0fe8ae61b838baf268e616057e46f8424df0a9b52f94e48cef4a7f-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=vllm/Qwen/Qwen3-0.6B-False]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375053, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-2e52800baf7e4d3389892f33feb3f52b", + "object": "model_permission", + "created": 1762375053, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-e42ca9261e3cee9c877322a51791ab6f113478170f8a21cd0a971c53b330e999-fb68f5a6.json b/tests/integration/inference/recordings/models-e42ca9261e3cee9c877322a51791ab6f113478170f8a21cd0a971c53b330e999-fb68f5a6.json new file mode 100644 index 000000000..5cdfadeb1 --- /dev/null +++ b/tests/integration/inference/recordings/models-e42ca9261e3cee9c877322a51791ab6f113478170f8a21cd0a971c53b330e999-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375150, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-cfec81fed838407597a92838017f3ef5", + "object": "model_permission", + "created": 1762375150, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-e5255919e39635597ad57c723896f9d258abaad9908b22ccd03c126ce597a5db-fb68f5a6.json b/tests/integration/inference/recordings/models-e5255919e39635597ad57c723896f9d258abaad9908b22ccd03c126ce597a5db-fb68f5a6.json new file mode 100644 index 000000000..82c9665de --- /dev/null +++ b/tests/integration/inference/recordings/models-e5255919e39635597ad57c723896f9d258abaad9908b22ccd03c126ce597a5db-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374466, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-8811d359d9724f8cac7fd6df608f69bd", + "object": "model_permission", + "created": 1762374466, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-e6664ff0c07b13aa2af6a85925f3841eef3907bc4a55f8bc352a8c960e782ada-fb68f5a6.json b/tests/integration/inference/recordings/models-e6664ff0c07b13aa2af6a85925f3841eef3907bc4a55f8bc352a8c960e782ada-fb68f5a6.json new file mode 100644 index 000000000..f83a6dea7 --- /dev/null +++ b/tests/integration/inference/recordings/models-e6664ff0c07b13aa2af6a85925f3841eef3907bc4a55f8bc352a8c960e782ada-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374482, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-d4b4923adfdf40b7bd7698aa798e68eb", + "object": "model_permission", + "created": 1762374482, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-edbd3344609a0fa1e97f75ede14a094a34db0dd6cb52975abae9f6e7832c6760-fb68f5a6.json b/tests/integration/inference/recordings/models-edbd3344609a0fa1e97f75ede14a094a34db0dd6cb52975abae9f6e7832c6760-fb68f5a6.json new file mode 100644 index 000000000..9084b4b59 --- /dev/null +++ b/tests/integration/inference/recordings/models-edbd3344609a0fa1e97f75ede14a094a34db0dd6cb52975abae9f6e7832c6760-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-False]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375291, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-a48cfd65bcd847d7aea01d44e8add51e", + "object": "model_permission", + "created": 1762375291, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-f6a9f5d7181cf078717443564e4de54e08845224d96b9c8150fb5cfda2068e82-fb68f5a6.json b/tests/integration/inference/recordings/models-f6a9f5d7181cf078717443564e4de54e08845224d96b9c8150fb5cfda2068e82-fb68f5a6.json new file mode 100644 index 000000000..9791dd1f7 --- /dev/null +++ b/tests/integration/inference/recordings/models-f6a9f5d7181cf078717443564e4de54e08845224d96b9c8150fb5cfda2068e82-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762374517, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-916d53706b624fefb83e5dcc699e7a69", + "object": "model_permission", + "created": 1762374517, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/inference/recordings/models-f936269fe152d95db3fb80fb10482e3cc79cfd6a28ebdf1a7a8b220ba2de641b-fb68f5a6.json b/tests/integration/inference/recordings/models-f936269fe152d95db3fb80fb10482e3cc79cfd6a28ebdf1a7a8b220ba2de641b-fb68f5a6.json new file mode 100644 index 000000000..c561e0df0 --- /dev/null +++ b/tests/integration/inference/recordings/models-f936269fe152d95db3fb80fb10482e3cc79cfd6a28ebdf1a7a8b220ba2de641b-fb68f5a6.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-False]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "Qwen/Qwen3-0.6B", + "created": 1762375285, + "object": "model", + "owned_by": "vllm", + "root": "/root/.cache/Qwen3-0.6B", + "parent": null, + "max_model_len": 8192, + "permission": [ + { + "id": "modelperm-e0640be42b814b3394545ebe92d844b3", + "object": "model_permission", + "created": 1762375285, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/suites.py b/tests/integration/suites.py index e1fb6a1c7..0cec66afe 100644 --- a/tests/integration/suites.py +++ b/tests/integration/suites.py @@ -78,7 +78,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = { "VLLM_URL": "http://localhost:8000/v1", }, defaults={ - "text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct", + "text_model": "vllm/Qwen/Qwen3-0.6B", "embedding_model": "sentence-transformers/nomic-embed-text-v1.5", }, ), @@ -169,6 +169,11 @@ SUITE_DEFINITIONS: dict[str, Suite] = { roots=base_roots, default_setup="ollama", ), + "base-vllm-subset": Suite( + name="base-vllm-subset", + roots=["tests/integration/inference"], + default_setup="vllm", + ), "responses": Suite( name="responses", roots=["tests/integration/responses"], diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 9da5dd25b..97ce4abe8 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -350,7 +350,7 @@ def test_openai_vector_store_search_empty( assert search_response is not None assert hasattr(search_response, "data") assert len(search_response.data) == 0 # Empty store should return no results - assert search_response.search_query == "test query" + assert search_response.search_query == ["test query"] assert search_response.has_more is False @@ -679,7 +679,7 @@ def test_openai_vector_store_attach_file( assert file_attach_response.id == file.id assert file_attach_response.vector_store_id == vector_store.id assert file_attach_response.status == "completed" - assert file_attach_response.chunking_strategy.type == "auto" + assert file_attach_response.chunking_strategy.type == "static" assert file_attach_response.created_at > 0 assert not file_attach_response.last_error @@ -815,8 +815,8 @@ def test_openai_vector_store_list_files( assert set(file_ids) == {file.id for file in files_list.data} assert files_list.data[0].object == "vector_store.file" assert files_list.data[0].vector_store_id == vector_store.id - assert files_list.data[0].status == "completed" - assert files_list.data[0].chunking_strategy.type == "auto" + assert files_list.data[0].status in ["completed", "in_progress"] + assert files_list.data[0].chunking_strategy.type == "static" assert files_list.data[0].created_at > 0 assert files_list.first_id == files_list.data[0].id assert not files_list.data[0].last_error @@ -825,7 +825,7 @@ def test_openai_vector_store_list_files( assert first_page.has_more assert len(first_page.data) == 2 assert first_page.first_id == first_page.data[0].id - assert first_page.last_id != first_page.data[-1].id + assert first_page.last_id == first_page.data[-1].id next_page = compat_client.vector_stores.files.list( vector_store_id=vector_store.id, limit=2, after=first_page.data[-1].id