Merge remote-tracking branch 'upstream/main' into feat/gunicorn-production-server

This commit is contained in:
Roy Belio 2025-10-30 16:43:34 +02:00
commit b060f73e6d
70 changed files with 46290 additions and 1133 deletions

View file

@ -43,6 +43,9 @@ jobs:
cache: 'npm' cache: 'npm'
cache-dependency-path: 'src/llama_stack/ui/' cache-dependency-path: 'src/llama_stack/ui/'
- name: Set up uv
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
- name: Install npm dependencies - name: Install npm dependencies
run: npm ci run: npm ci
working-directory: src/llama_stack/ui working-directory: src/llama_stack/ui
@ -52,7 +55,7 @@ jobs:
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
continue-on-error: true continue-on-error: true
env: env:
SKIP: no-commit-to-branch SKIP: no-commit-to-branch,mypy
RUFF_OUTPUT_FORMAT: github RUFF_OUTPUT_FORMAT: github
- name: Check pre-commit results - name: Check pre-commit results
@ -109,3 +112,16 @@ jobs:
echo "$unstaged_files" echo "$unstaged_files"
exit 1 exit 1
fi fi
- name: Sync dev + type_checking dependencies
run: uv sync --group dev --group type_checking
- name: Run mypy (full type_checking)
run: |
set +e
uv run --group dev --group type_checking mypy
status=$?
if [ $status -ne 0 ]; then
echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
fi
exit $status

3
.gitignore vendored
View file

@ -32,3 +32,6 @@ CLAUDE.md
docs/.docusaurus/ docs/.docusaurus/
docs/node_modules/ docs/node_modules/
docs/static/imported-files/ docs/static/imported-files/
docs/docs/api-deprecated/
docs/docs/api-experimental/
docs/docs/api/

View file

@ -57,17 +57,27 @@ repos:
hooks: hooks:
- id: uv-lock - id: uv-lock
- repo: local - repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.18.2
hooks: hooks:
- id: mypy - id: mypy
name: mypy
additional_dependencies: additional_dependencies:
- uv==0.7.8 - uv==0.6.2
entry: uv run --group dev --group type_checking mypy - pytest
language: python - rich
types: [python] - types-requests
- pydantic
- httpx
pass_filenames: false pass_filenames: false
require_serial: true
- repo: local
hooks:
- id: mypy-full
name: mypy (full type_checking)
entry: uv run --group dev --group type_checking mypy
language: system
pass_filenames: false
stages: [manual]
# - repo: https://github.com/tcort/markdown-link-check # - repo: https://github.com/tcort/markdown-link-check
# rev: v3.11.2 # rev: v3.11.2
@ -152,7 +162,6 @@ repos:
files: ^src/llama_stack/ui/.*\.(ts|tsx)$ files: ^src/llama_stack/ui/.*\.(ts|tsx)$
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
- id: check-log-usage - id: check-log-usage
name: Ensure 'llama_stack.log' usage for logging name: Ensure 'llama_stack.log' usage for logging
entry: bash entry: bash
@ -171,7 +180,23 @@ repos:
exit 1 exit 1
fi fi
exit 0 exit 0
- id: fips-compliance
name: Ensure llama-stack remains FIPS compliant
entry: bash
language: system
types: [python]
pass_filenames: true
exclude: '^tests/.*$' # Exclude test dir as some safety tests used MD5
args:
- -c
- |
grep -EnH '^[^#]*\b(md5|sha1|uuid3|uuid5)\b' "$@" && {
echo;
echo "❌ Do not use any of the following functions: hashlib.md5, hashlib.sha1, uuid.uuid3, uuid.uuid5"
echo " These functions are not FIPS-compliant"
echo;
exit 1;
} || true
ci: ci:
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate

View file

@ -61,6 +61,18 @@ uv run pre-commit run --all-files -v
The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify. The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify.
To run the expanded mypy configuration that CI enforces, use:
```bash
uv run pre-commit run mypy-full --hook-stage manual --all-files
```
or invoke mypy directly with all optional dependencies:
```bash
uv run --group dev --group type_checking mypy
```
```{caution} ```{caution}
Before pushing your changes, make sure that the pre-commit hooks have passed successfully. Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
``` ```

View file

@ -1,610 +0,0 @@
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
organization:
# Name of your organization or company, used to determine the name of the client
# and headings.
name: llama-stack-client
docs: https://llama-stack.readthedocs.io/en/latest/
contact: llamastack@meta.com
security:
- {}
- BearerAuth: []
security_schemes:
BearerAuth:
type: http
scheme: bearer
# `targets` define the output targets and their customization options, such as
# whether to emit the Node SDK and what it's package name should be.
targets:
node:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-typescript
publish:
npm: false
python:
package_name: llama_stack_client
production_repo: llamastack/llama-stack-client-python
options:
use_uv: true
publish:
pypi: true
project_name: llama_stack_client
kotlin:
reverse_domain: com.llama_stack_client.api
production_repo: null
publish:
maven: false
go:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-go
options:
enable_v2: true
back_compat_use_shared_package: false
# `client_settings` define settings for the API client, such as extra constructor
# arguments (used for authentication), retry behavior, idempotency, etc.
client_settings:
default_env_prefix: LLAMA_STACK_CLIENT
opts:
api_key:
type: string
read_env: LLAMA_STACK_CLIENT_API_KEY
auth: { security_scheme: BearerAuth }
nullable: true
# `environments` are a map of the name of the environment (e.g. "sandbox",
# "production") to the corresponding url to use.
environments:
production: http://any-hosted-llama-stack.com
# `pagination` defines [pagination schemes] which provides a template to match
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
pagination:
- name: datasets_iterrows
type: offset
request:
dataset_id:
type: string
start_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_param
limit:
type: integer
response:
data:
type: array
items:
type: object
next_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_start_field
- name: openai_cursor_page
type: cursor
request:
limit:
type: integer
after:
type: string
x-stainless-pagination-property:
purpose: next_cursor_param
response:
data:
type: array
items: {}
has_more:
type: boolean
last_id:
type: string
x-stainless-pagination-property:
purpose: next_cursor_field
# `resources` define the structure and organziation for your API, such as how
# methods and models are grouped together and accessed. See the [configuration
# guide] for more information.
#
# [configuration guide]:
# https://app.stainlessapi.com/docs/guides/configure#resources
resources:
$shared:
models:
agent_config: AgentConfig
interleaved_content_item: InterleavedContentItem
interleaved_content: InterleavedContent
param_type: ParamType
safety_violation: SafetyViolation
sampling_params: SamplingParams
scoring_result: ScoringResult
message: Message
user_message: UserMessage
completion_message: CompletionMessage
tool_response_message: ToolResponseMessage
system_message: SystemMessage
tool_call: ToolCall
query_result: RAGQueryResult
document: RAGDocument
query_config: RAGQueryConfig
response_format: ResponseFormat
toolgroups:
models:
tool_group: ToolGroup
list_tool_groups_response: ListToolGroupsResponse
methods:
register: post /v1/toolgroups
get: get /v1/toolgroups/{toolgroup_id}
list: get /v1/toolgroups
unregister: delete /v1/toolgroups/{toolgroup_id}
tools:
methods:
get: get /v1/tools/{tool_name}
list:
endpoint: get /v1/tools
paginated: false
tool_runtime:
models:
tool_def: ToolDef
tool_invocation_result: ToolInvocationResult
methods:
list_tools:
endpoint: get /v1/tool-runtime/list-tools
paginated: false
invoke_tool: post /v1/tool-runtime/invoke
subresources:
rag_tool:
methods:
insert: post /v1/tool-runtime/rag-tool/insert
query: post /v1/tool-runtime/rag-tool/query
responses:
models:
response_object_stream: OpenAIResponseObjectStream
response_object: OpenAIResponseObject
methods:
create:
type: http
endpoint: post /v1/responses
streaming:
stream_event_model: responses.response_object_stream
param_discriminator: stream
retrieve: get /v1/responses/{response_id}
list:
type: http
endpoint: get /v1/responses
delete:
type: http
endpoint: delete /v1/responses/{response_id}
subresources:
input_items:
methods:
list:
type: http
endpoint: get /v1/responses/{response_id}/input_items
conversations:
models:
conversation_object: Conversation
methods:
create:
type: http
endpoint: post /v1/conversations
retrieve: get /v1/conversations/{conversation_id}
update:
type: http
endpoint: post /v1/conversations/{conversation_id}
delete:
type: http
endpoint: delete /v1/conversations/{conversation_id}
subresources:
items:
methods:
get:
type: http
endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
list:
type: http
endpoint: get /v1/conversations/{conversation_id}/items
create:
type: http
endpoint: post /v1/conversations/{conversation_id}/items
inspect:
models:
healthInfo: HealthInfo
providerInfo: ProviderInfo
routeInfo: RouteInfo
versionInfo: VersionInfo
methods:
health: get /v1/health
version: get /v1/version
embeddings:
models:
create_embeddings_response: OpenAIEmbeddingsResponse
methods:
create: post /v1/embeddings
chat:
models:
chat_completion_chunk: OpenAIChatCompletionChunk
subresources:
completions:
methods:
create:
type: http
endpoint: post /v1/chat/completions
streaming:
stream_event_model: chat.chat_completion_chunk
param_discriminator: stream
list:
type: http
endpoint: get /v1/chat/completions
retrieve:
type: http
endpoint: get /v1/chat/completions/{completion_id}
completions:
methods:
create:
type: http
endpoint: post /v1/completions
streaming:
param_discriminator: stream
vector_io:
models:
queryChunksResponse: QueryChunksResponse
methods:
insert: post /v1/vector-io/insert
query: post /v1/vector-io/query
vector_stores:
models:
vector_store: VectorStoreObject
list_vector_stores_response: VectorStoreListResponse
vector_store_delete_response: VectorStoreDeleteResponse
vector_store_search_response: VectorStoreSearchResponsePage
methods:
create: post /v1/vector_stores
list:
endpoint: get /v1/vector_stores
retrieve: get /v1/vector_stores/{vector_store_id}
update: post /v1/vector_stores/{vector_store_id}
delete: delete /v1/vector_stores/{vector_store_id}
search: post /v1/vector_stores/{vector_store_id}/search
subresources:
files:
models:
vector_store_file: VectorStoreFileObject
methods:
list: get /v1/vector_stores/{vector_store_id}/files
retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
create: post /v1/vector_stores/{vector_store_id}/files
content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
file_batches:
models:
vector_store_file_batches: VectorStoreFileBatchObject
list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
methods:
create: post /v1/vector_stores/{vector_store_id}/file_batches
retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
models:
models:
model: Model
list_models_response: ListModelsResponse
methods:
retrieve: get /v1/models/{model_id}
list:
endpoint: get /v1/models
paginated: false
register: post /v1/models
unregister: delete /v1/models/{model_id}
subresources:
openai:
methods:
list:
endpoint: get /v1/models
paginated: false
providers:
models:
list_providers_response: ListProvidersResponse
methods:
list:
endpoint: get /v1/providers
paginated: false
retrieve: get /v1/providers/{provider_id}
routes:
models:
list_routes_response: ListRoutesResponse
methods:
list:
endpoint: get /v1/inspect/routes
paginated: false
moderations:
models:
create_response: ModerationObject
methods:
create: post /v1/moderations
safety:
models:
run_shield_response: RunShieldResponse
methods:
run_shield: post /v1/safety/run-shield
shields:
models:
shield: Shield
list_shields_response: ListShieldsResponse
methods:
retrieve: get /v1/shields/{identifier}
list:
endpoint: get /v1/shields
paginated: false
register: post /v1/shields
delete: delete /v1/shields/{identifier}
synthetic_data_generation:
models:
syntheticDataGenerationResponse: SyntheticDataGenerationResponse
methods:
generate: post /v1/synthetic-data-generation/generate
telemetry:
models:
span_with_status: SpanWithStatus
trace: Trace
query_spans_response: QuerySpansResponse
event: Event
query_condition: QueryCondition
methods:
query_traces:
endpoint: post /v1alpha/telemetry/traces
skip_test_reason: 'unsupported query params in java / kotlin'
get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
query_spans:
endpoint: post /v1alpha/telemetry/spans
skip_test_reason: 'unsupported query params in java / kotlin'
query_metrics:
endpoint: post /v1alpha/telemetry/metrics/{metric_name}
skip_test_reason: 'unsupported query params in java / kotlin'
# log_event: post /v1alpha/telemetry/events
save_spans_to_dataset: post /v1alpha/telemetry/spans/export
get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
get_trace: get /v1alpha/telemetry/traces/{trace_id}
scoring:
methods:
score: post /v1/scoring/score
score_batch: post /v1/scoring/score-batch
scoring_functions:
methods:
retrieve: get /v1/scoring-functions/{scoring_fn_id}
list:
endpoint: get /v1/scoring-functions
paginated: false
register: post /v1/scoring-functions
models:
scoring_fn: ScoringFn
scoring_fn_params: ScoringFnParams
list_scoring_functions_response: ListScoringFunctionsResponse
benchmarks:
methods:
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
list:
endpoint: get /v1alpha/eval/benchmarks
paginated: false
register: post /v1alpha/eval/benchmarks
models:
benchmark: Benchmark
list_benchmarks_response: ListBenchmarksResponse
files:
methods:
create: post /v1/files
list: get /v1/files
retrieve: get /v1/files/{file_id}
delete: delete /v1/files/{file_id}
content: get /v1/files/{file_id}/content
models:
file: OpenAIFileObject
list_files_response: ListOpenAIFileResponse
delete_file_response: OpenAIFileDeleteResponse
alpha:
subresources:
inference:
methods:
rerank: post /v1alpha/inference/rerank
post_training:
models:
algorithm_config: AlgorithmConfig
post_training_job: PostTrainingJob
list_post_training_jobs_response: ListPostTrainingJobsResponse
methods:
preference_optimize: post /v1alpha/post-training/preference-optimize
supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
subresources:
job:
methods:
artifacts: get /v1alpha/post-training/job/artifacts
cancel: post /v1alpha/post-training/job/cancel
status: get /v1alpha/post-training/job/status
list:
endpoint: get /v1alpha/post-training/jobs
paginated: false
eval:
methods:
evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
subresources:
jobs:
methods:
cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
models:
evaluate_response: EvaluateResponse
benchmark_config: BenchmarkConfig
job: Job
agents:
methods:
create: post /v1alpha/agents
list: get /v1alpha/agents
retrieve: get /v1alpha/agents/{agent_id}
delete: delete /v1alpha/agents/{agent_id}
models:
inference_step: InferenceStep
tool_execution_step: ToolExecutionStep
tool_response: ToolResponse
shield_call_step: ShieldCallStep
memory_retrieval_step: MemoryRetrievalStep
subresources:
session:
models:
session: Session
methods:
list: get /v1alpha/agents/{agent_id}/sessions
create: post /v1alpha/agents/{agent_id}/session
delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
steps:
methods:
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
turn:
models:
turn: Turn
turn_response_event: AgentTurnResponseEvent
agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
methods:
create:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
resume:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
beta:
subresources:
datasets:
models:
list_datasets_response: ListDatasetsResponse
methods:
register: post /v1beta/datasets
retrieve: get /v1beta/datasets/{dataset_id}
list:
endpoint: get /v1beta/datasets
paginated: false
unregister: delete /v1beta/datasets/{dataset_id}
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
settings:
license: MIT
unwrap_response_fields: [ data ]
openapi:
transformations:
- command: renameValue
reason: pydantic reserved name
args:
filter:
only:
- '$.components.schemas.InferenceStep.properties.model_response'
rename:
python:
property_name: 'inference_model_response'
# - command: renameValue
# reason: pydantic reserved name
# args:
# filter:
# only:
# - '$.components.schemas.Model.properties.model_type'
# rename:
# python:
# property_name: 'type'
- command: mergeObject
reason: Better return_type using enum
args:
target:
- '$.components.schemas'
object:
ReturnType:
additionalProperties: false
properties:
type:
enum:
- string
- number
- boolean
- array
- object
- json
- union
- chat_completion_input
- completion_input
- agent_turn_input
required:
- type
type: object
- command: replaceProperties
reason: Replace return type properties with better model (see above)
args:
filter:
only:
- '$.components.schemas.ScoringFn.properties.return_type'
- '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
value:
$ref: '#/components/schemas/ReturnType'
- command: oneOfToAnyOf
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
- reason: For better names
command: extractToRefs
args:
ref:
target: '$.components.schemas.ToolCallDelta.properties.tool_call'
name: '#/components/schemas/ToolCallOrString'
# `readme` is used to configure the code snippets that will be rendered in the
# README.md of various SDKs. In particular, you can change the `headline`
# snippet's endpoint and the arguments to call it with.
readme:
example_requests:
default:
type: request
endpoint: post /v1/chat/completions
params: &ref_0 {}
headline:
type: request
endpoint: post /v1/models
params: *ref_0
pagination:
type: request
endpoint: post /v1/chat/completions
params: {}

View file

@ -15,6 +15,141 @@ info:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
paths: paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions: /v1/chat/completions:
get: get:
responses: responses:
@ -4212,6 +4347,331 @@ components:
title: Error title: Error
description: >- description: >-
Error response from the API. Roughly follows RFC 7807. Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order: Order:
type: string type: string
enum: enum:
@ -10258,6 +10718,10 @@ components:
description: >- description: >-
The content of the chunk, which can be interleaved text, images, or other The content of the chunk, which can be interleaved text, images, or other
types. types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -10278,10 +10742,6 @@ components:
description: >- description: >-
Optional embedding for the chunk. If not provided, it will be computed Optional embedding for the chunk. If not provided, it will be computed
later. later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata: chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata' $ref: '#/components/schemas/ChunkMetadata'
description: >- description: >-
@ -10290,6 +10750,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- content - content
- chunk_id
- metadata - metadata
title: Chunk title: Chunk
description: >- description: >-
@ -13527,6 +13988,19 @@ tags:
description: >- description: >-
APIs for creating and interacting with agentic systems. APIs for creating and interacting with agentic systems.
x-displayName: Agents x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks - name: Benchmarks
description: '' description: ''
- name: Conversations - name: Conversations
@ -13601,6 +14075,7 @@ x-tagGroups:
- name: Operations - name: Operations
tags: tags:
- Agents - Agents
- Batches
- Benchmarks - Benchmarks
- Conversations - Conversations
- DatasetIO - DatasetIO

File diff suppressed because it is too large Load diff

View file

@ -242,15 +242,6 @@ const sidebars: SidebarsConfig = {
'providers/eval/remote_nvidia' 'providers/eval/remote_nvidia'
], ],
}, },
{
type: 'category',
label: 'Telemetry',
collapsed: true,
items: [
'providers/telemetry/index',
'providers/telemetry/inline_meta-reference'
],
},
{ {
type: 'category', type: 'category',
label: 'Batches', label: 'Batches',

View file

@ -1414,6 +1414,193 @@
"deprecated": true "deprecated": true
} }
}, },
"/v1/openai/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": true
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/openai/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/openai/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/openai/v1/chat/completions": { "/v1/openai/v1/chat/completions": {
"get": { "get": {
"responses": { "responses": {
@ -6401,6 +6588,451 @@
"title": "Job", "title": "Job",
"description": "A job execution instance with status tracking." "description": "A job execution instance with status tracking."
}, },
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": { "Order": {
"type": "string", "type": "string",
"enum": [ "enum": [
@ -13505,6 +14137,11 @@
"description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n", "description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
"x-displayName": "Agents" "x-displayName": "Agents"
}, },
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{ {
"name": "Benchmarks", "name": "Benchmarks",
"description": "" "description": ""
@ -13555,6 +14192,7 @@
"name": "Operations", "name": "Operations",
"tags": [ "tags": [
"Agents", "Agents",
"Batches",
"Benchmarks", "Benchmarks",
"DatasetIO", "DatasetIO",
"Datasets", "Datasets",

View file

@ -1012,6 +1012,141 @@ paths:
schema: schema:
type: string type: string
deprecated: true deprecated: true
/v1/openai/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: true
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: true
/v1/openai/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: true
/v1/openai/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: true
/v1/openai/v1/chat/completions: /v1/openai/v1/chat/completions:
get: get:
responses: responses:
@ -4736,6 +4871,331 @@ components:
title: Job title: Job
description: >- description: >-
A job execution instance with status tracking. A job execution instance with status tracking.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order: Order:
type: string type: string
enum: enum:
@ -10263,6 +10723,19 @@ tags:
- **Responses API**: Use the stable v1 Responses API endpoints - **Responses API**: Use the stable v1 Responses API endpoints
x-displayName: Agents x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks - name: Benchmarks
description: '' description: ''
- name: DatasetIO - name: DatasetIO
@ -10308,6 +10781,7 @@ x-tagGroups:
- name: Operations - name: Operations
tags: tags:
- Agents - Agents
- Batches
- Benchmarks - Benchmarks
- DatasetIO - DatasetIO
- Datasets - Datasets

View file

@ -40,6 +40,193 @@
} }
], ],
"paths": { "paths": {
"/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": false
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/chat/completions": { "/v1/chat/completions": {
"get": { "get": {
"responses": { "responses": {
@ -4005,6 +4192,451 @@
"title": "Error", "title": "Error",
"description": "Error response from the API. Roughly follows RFC 7807." "description": "Error response from the API. Roughly follows RFC 7807."
}, },
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": { "Order": {
"type": "string", "type": "string",
"enum": [ "enum": [
@ -11897,6 +12529,10 @@
"$ref": "#/components/schemas/InterleavedContent", "$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the chunk, which can be interleaved text, images, or other types." "description": "The content of the chunk, which can be interleaved text, images, or other types."
}, },
"chunk_id": {
"type": "string",
"description": "Unique identifier for the chunk. Must be provided explicitly."
},
"metadata": { "metadata": {
"type": "object", "type": "object",
"additionalProperties": { "additionalProperties": {
@ -11930,10 +12566,6 @@
}, },
"description": "Optional embedding for the chunk. If not provided, it will be computed later." "description": "Optional embedding for the chunk. If not provided, it will be computed later."
}, },
"stored_chunk_id": {
"type": "string",
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
},
"chunk_metadata": { "chunk_metadata": {
"$ref": "#/components/schemas/ChunkMetadata", "$ref": "#/components/schemas/ChunkMetadata",
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality." "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -11942,6 +12574,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"content", "content",
"chunk_id",
"metadata" "metadata"
], ],
"title": "Chunk", "title": "Chunk",
@ -13288,6 +13921,11 @@
"description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`", "description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
"x-displayName": "Agents" "x-displayName": "Agents"
}, },
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{ {
"name": "Conversations", "name": "Conversations",
"description": "Protocol for conversation management operations.", "description": "Protocol for conversation management operations.",
@ -13361,6 +13999,7 @@
"name": "Operations", "name": "Operations",
"tags": [ "tags": [
"Agents", "Agents",
"Batches",
"Conversations", "Conversations",
"Files", "Files",
"Inference", "Inference",

View file

@ -12,6 +12,141 @@ info:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
paths: paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions: /v1/chat/completions:
get: get:
responses: responses:
@ -2999,6 +3134,331 @@ components:
title: Error title: Error
description: >- description: >-
Error response from the API. Roughly follows RFC 7807. Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order: Order:
type: string type: string
enum: enum:
@ -9045,6 +9505,10 @@ components:
description: >- description: >-
The content of the chunk, which can be interleaved text, images, or other The content of the chunk, which can be interleaved text, images, or other
types. types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -9065,10 +9529,6 @@ components:
description: >- description: >-
Optional embedding for the chunk. If not provided, it will be computed Optional embedding for the chunk. If not provided, it will be computed
later. later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata: chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata' $ref: '#/components/schemas/ChunkMetadata'
description: >- description: >-
@ -9077,6 +9537,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- content - content
- chunk_id
- metadata - metadata
title: Chunk title: Chunk
description: >- description: >-
@ -10143,6 +10604,19 @@ tags:
- `background` - `background`
x-displayName: Agents x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Conversations - name: Conversations
description: >- description: >-
Protocol for conversation management operations. Protocol for conversation management operations.
@ -10205,6 +10679,7 @@ x-tagGroups:
- name: Operations - name: Operations
tags: tags:
- Agents - Agents
- Batches
- Conversations - Conversations
- Files - Files
- Inference - Inference

View file

@ -40,6 +40,193 @@
} }
], ],
"paths": { "paths": {
"/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": false
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/chat/completions": { "/v1/chat/completions": {
"get": { "get": {
"responses": { "responses": {
@ -5677,6 +5864,451 @@
"title": "Error", "title": "Error",
"description": "Error response from the API. Roughly follows RFC 7807." "description": "Error response from the API. Roughly follows RFC 7807."
}, },
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": { "Order": {
"type": "string", "type": "string",
"enum": [ "enum": [
@ -13569,6 +14201,10 @@
"$ref": "#/components/schemas/InterleavedContent", "$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the chunk, which can be interleaved text, images, or other types." "description": "The content of the chunk, which can be interleaved text, images, or other types."
}, },
"chunk_id": {
"type": "string",
"description": "Unique identifier for the chunk. Must be provided explicitly."
},
"metadata": { "metadata": {
"type": "object", "type": "object",
"additionalProperties": { "additionalProperties": {
@ -13602,10 +14238,6 @@
}, },
"description": "Optional embedding for the chunk. If not provided, it will be computed later." "description": "Optional embedding for the chunk. If not provided, it will be computed later."
}, },
"stored_chunk_id": {
"type": "string",
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
},
"chunk_metadata": { "chunk_metadata": {
"$ref": "#/components/schemas/ChunkMetadata", "$ref": "#/components/schemas/ChunkMetadata",
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality." "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -13614,6 +14246,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"content", "content",
"chunk_id",
"metadata" "metadata"
], ],
"title": "Chunk", "title": "Chunk",
@ -17960,6 +18593,11 @@
"description": "APIs for creating and interacting with agentic systems.", "description": "APIs for creating and interacting with agentic systems.",
"x-displayName": "Agents" "x-displayName": "Agents"
}, },
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{ {
"name": "Benchmarks", "name": "Benchmarks",
"description": "" "description": ""
@ -18054,6 +18692,7 @@
"name": "Operations", "name": "Operations",
"tags": [ "tags": [
"Agents", "Agents",
"Batches",
"Benchmarks", "Benchmarks",
"Conversations", "Conversations",
"DatasetIO", "DatasetIO",

View file

@ -15,6 +15,141 @@ info:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
paths: paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions: /v1/chat/completions:
get: get:
responses: responses:
@ -4212,6 +4347,331 @@ components:
title: Error title: Error
description: >- description: >-
Error response from the API. Roughly follows RFC 7807. Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order: Order:
type: string type: string
enum: enum:
@ -10258,6 +10718,10 @@ components:
description: >- description: >-
The content of the chunk, which can be interleaved text, images, or other The content of the chunk, which can be interleaved text, images, or other
types. types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -10278,10 +10742,6 @@ components:
description: >- description: >-
Optional embedding for the chunk. If not provided, it will be computed Optional embedding for the chunk. If not provided, it will be computed
later. later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata: chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata' $ref: '#/components/schemas/ChunkMetadata'
description: >- description: >-
@ -10290,6 +10750,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- content - content
- chunk_id
- metadata - metadata
title: Chunk title: Chunk
description: >- description: >-
@ -13527,6 +13988,19 @@ tags:
description: >- description: >-
APIs for creating and interacting with agentic systems. APIs for creating and interacting with agentic systems.
x-displayName: Agents x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks - name: Benchmarks
description: '' description: ''
- name: Conversations - name: Conversations
@ -13601,6 +14075,7 @@ x-tagGroups:
- name: Operations - name: Operations
tags: tags:
- Agents - Agents
- Batches
- Benchmarks - Benchmarks
- Conversations - Conversations
- DatasetIO - DatasetIO

View file

@ -285,7 +285,6 @@ exclude = [
"^src/llama_stack/models/llama/llama3/interface\\.py$", "^src/llama_stack/models/llama/llama3/interface\\.py$",
"^src/llama_stack/models/llama/llama3/tokenizer\\.py$", "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
"^src/llama_stack/models/llama/llama3/tool_utils\\.py$", "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
"^src/llama_stack/providers/inline/agents/meta_reference/",
"^src/llama_stack/providers/inline/datasetio/localfs/", "^src/llama_stack/providers/inline/datasetio/localfs/",
"^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$", "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
"^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$", "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",

View file

@ -313,8 +313,20 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
fi fi
echo "Using image: $IMAGE_NAME" echo "Using image: $IMAGE_NAME"
docker run -d --network host --name "$container_name" \ # On macOS/Darwin, --network host doesn't work as expected due to Docker running in a VM
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ # Use regular port mapping instead
NETWORK_MODE=""
PORT_MAPPINGS=""
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
NETWORK_MODE="--network host"
else
# On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
echo "Using bridge networking with port mapping (non-Linux)"
fi
docker run -d $NETWORK_MODE --name "$container_name" \
$PORT_MAPPINGS \
$DOCKER_ENV_VARS \ $DOCKER_ENV_VARS \
"$IMAGE_NAME" \ "$IMAGE_NAME" \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from collections.abc import Sequence
from typing import Annotated, Any, Literal from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field, model_validator from pydantic import BaseModel, Field, model_validator
@ -202,7 +203,7 @@ class OpenAIResponseMessage(BaseModel):
scenarios. scenarios.
""" """
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent] content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"] role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
type: Literal["message"] = "message" type: Literal["message"] = "message"
@ -254,10 +255,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
""" """
id: str id: str
queries: list[str] queries: Sequence[str]
status: str status: str
type: Literal["file_search_call"] = "file_search_call" type: Literal["file_search_call"] = "file_search_call"
results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
@json_schema_type @json_schema_type
@ -597,7 +598,7 @@ class OpenAIResponseObject(BaseModel):
id: str id: str
model: str model: str
object: Literal["response"] = "response" object: Literal["response"] = "response"
output: list[OpenAIResponseOutput] output: Sequence[OpenAIResponseOutput]
parallel_tool_calls: bool = False parallel_tool_calls: bool = False
previous_response_id: str | None = None previous_response_id: str | None = None
prompt: OpenAIResponsePrompt | None = None prompt: OpenAIResponsePrompt | None = None
@ -607,7 +608,7 @@ class OpenAIResponseObject(BaseModel):
# before the field was added. New responses will have this set always. # before the field was added. New responses will have this set always.
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
top_p: float | None = None top_p: float | None = None
tools: list[OpenAIResponseTool] | None = None tools: Sequence[OpenAIResponseTool] | None = None
truncation: str | None = None truncation: str | None = None
usage: OpenAIResponseUsage | None = None usage: OpenAIResponseUsage | None = None
instructions: str | None = None instructions: str | None = None
@ -1315,7 +1316,7 @@ class ListOpenAIResponseInputItem(BaseModel):
:param object: Object type identifier, always "list" :param object: Object type identifier, always "list"
""" """
data: list[OpenAIResponseInput] data: Sequence[OpenAIResponseInput]
object: Literal["list"] = "list" object: Literal["list"] = "list"
@ -1326,7 +1327,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
:param input: List of input items that led to this response :param input: List of input items that led to this response
""" """
input: list[OpenAIResponseInput] input: Sequence[OpenAIResponseInput]
def to_response_object(self) -> OpenAIResponseObject: def to_response_object(self) -> OpenAIResponseObject:
"""Convert to OpenAIResponseObject by excluding input field.""" """Convert to OpenAIResponseObject by excluding input field."""
@ -1344,7 +1345,7 @@ class ListOpenAIResponseObject(BaseModel):
:param object: Object type identifier, always "list" :param object: Object type identifier, always "list"
""" """
data: list[OpenAIResponseObjectWithInput] data: Sequence[OpenAIResponseObjectWithInput]
has_more: bool has_more: bool
first_id: str first_id: str
last_id: str last_id: str

View file

@ -8,7 +8,6 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import uuid
from typing import Annotated, Any, Literal, Protocol, runtime_checkable from typing import Annotated, Any, Literal, Protocol, runtime_checkable
from fastapi import Body from fastapi import Body
@ -18,7 +17,6 @@ from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.vector_stores import VectorStore
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
from llama_stack.strong_typing.schema import register_schema from llama_stack.strong_typing.schema import register_schema
@ -61,38 +59,19 @@ class Chunk(BaseModel):
""" """
A chunk of content that can be inserted into a vector database. A chunk of content that can be inserted into a vector database.
:param content: The content of the chunk, which can be interleaved text, images, or other types. :param content: The content of the chunk, which can be interleaved text, images, or other types.
:param embedding: Optional embedding for the chunk. If not provided, it will be computed later. :param chunk_id: Unique identifier for the chunk. Must be provided explicitly.
:param metadata: Metadata associated with the chunk that will be used in the model context during inference. :param metadata: Metadata associated with the chunk that will be used in the model context during inference.
:param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality. :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
:param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference. :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
The `chunk_metadata` is required backend functionality. The `chunk_metadata` is required backend functionality.
""" """
content: InterleavedContent content: InterleavedContent
chunk_id: str
metadata: dict[str, Any] = Field(default_factory=dict) metadata: dict[str, Any] = Field(default_factory=dict)
embedding: list[float] | None = None embedding: list[float] | None = None
# The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
chunk_metadata: ChunkMetadata | None = None chunk_metadata: ChunkMetadata | None = None
model_config = {"populate_by_name": True}
def model_post_init(self, __context):
# Extract chunk_id from metadata if present
if self.metadata and "chunk_id" in self.metadata:
self.stored_chunk_id = self.metadata.pop("chunk_id")
@property
def chunk_id(self) -> str:
"""Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
if self.stored_chunk_id:
return self.stored_chunk_id
if "document_id" in self.metadata:
return generate_chunk_id(self.metadata["document_id"], str(self.content))
return generate_chunk_id(str(uuid.uuid4()), str(self.content))
@property @property
def document_id(self) -> str | None: def document_id(self) -> str | None:
"""Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence.""" """Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence."""

View file

@ -13,6 +13,8 @@ from llama_stack.core.datatypes import (
ModelWithOwner, ModelWithOwner,
RegistryEntrySource, RegistryEntrySource,
) )
from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.log import get_logger from llama_stack.log import get_logger
from .common import CommonRoutingTableImpl, lookup_model from .common import CommonRoutingTableImpl, lookup_model
@ -42,11 +44,90 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
await self.update_registered_models(provider_id, models) await self.update_registered_models(provider_id, models)
async def _get_dynamic_models_from_provider_data(self) -> list[Model]:
"""
Fetch models from providers that have credentials in the current request's provider_data.
This allows users to see models available to them from providers that require
per-request API keys (via X-LlamaStack-Provider-Data header).
Returns models with fully qualified identifiers (provider_id/model_id) but does NOT
cache them in the registry since they are user-specific.
"""
provider_data = PROVIDER_DATA_VAR.get()
if not provider_data:
return []
dynamic_models = []
for provider_id, provider in self.impls_by_provider_id.items():
# Check if this provider supports provider_data
if not isinstance(provider, NeedsRequestProviderData):
continue
# Check if provider has a validator (some providers like ollama don't need per-request credentials)
spec = getattr(provider, "__provider_spec__", None)
if not spec or not getattr(spec, "provider_data_validator", None):
continue
# Validate provider_data silently - we're speculatively checking all providers
# so validation failures are expected when user didn't provide keys for this provider
try:
validator = instantiate_class_type(spec.provider_data_validator)
validator(**provider_data)
except Exception:
# User didn't provide credentials for this provider - skip silently
continue
# Validation succeeded! User has credentials for this provider
# Now try to list models
try:
models = await provider.list_models()
if not models:
continue
# Ensure models have fully qualified identifiers with provider_id prefix
for model in models:
# Only add prefix if model identifier doesn't already have it
if not model.identifier.startswith(f"{provider_id}/"):
model.identifier = f"{provider_id}/{model.provider_resource_id}"
dynamic_models.append(model)
logger.debug(f"Fetched {len(models)} models from provider {provider_id} using provider_data")
except Exception as e:
logger.debug(f"Failed to list models from provider {provider_id} with provider_data: {e}")
continue
return dynamic_models
async def list_models(self) -> ListModelsResponse: async def list_models(self) -> ListModelsResponse:
return ListModelsResponse(data=await self.get_all_with_type("model")) # Get models from registry
registry_models = await self.get_all_with_type("model")
# Get additional models available via provider_data (user-specific, not cached)
dynamic_models = await self._get_dynamic_models_from_provider_data()
# Combine, avoiding duplicates (registry takes precedence)
registry_identifiers = {m.identifier for m in registry_models}
unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
return ListModelsResponse(data=registry_models + unique_dynamic_models)
async def openai_list_models(self) -> OpenAIListModelsResponse: async def openai_list_models(self) -> OpenAIListModelsResponse:
models = await self.get_all_with_type("model") # Get models from registry
registry_models = await self.get_all_with_type("model")
# Get additional models available via provider_data (user-specific, not cached)
dynamic_models = await self._get_dynamic_models_from_provider_data()
# Combine, avoiding duplicates (registry takes precedence)
registry_identifiers = {m.identifier for m in registry_models}
unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
all_models = registry_models + unique_dynamic_models
openai_models = [ openai_models = [
OpenAIModel( OpenAIModel(
id=model.identifier, id=model.identifier,
@ -54,7 +135,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
created=int(time.time()), created=int(time.time()),
owned_by="llama_stack", owned_by="llama_stack",
) )
for model in models for model in all_models
] ]
return OpenAIListModelsResponse(data=openai_models) return OpenAIListModelsResponse(data=openai_models)

View file

@ -14,6 +14,7 @@ from typing import Any
import yaml import yaml
from llama_stack.apis.agents import Agents from llama_stack.apis.agents import Agents
from llama_stack.apis.batches import Batches
from llama_stack.apis.benchmarks import Benchmarks from llama_stack.apis.benchmarks import Benchmarks
from llama_stack.apis.conversations import Conversations from llama_stack.apis.conversations import Conversations
from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasetio import DatasetIO
@ -63,6 +64,7 @@ class LlamaStack(
Providers, Providers,
Inference, Inference,
Agents, Agents,
Batches,
Safety, Safety,
SyntheticDataGeneration, SyntheticDataGeneration,
Datasets, Datasets,

View file

@ -11,6 +11,7 @@ import uuid
import warnings import warnings
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from datetime import UTC, datetime from datetime import UTC, datetime
from typing import Any, cast
import httpx import httpx
@ -125,12 +126,12 @@ class ChatAgent(ShieldRunnerMixin):
) )
def turn_to_messages(self, turn: Turn) -> list[Message]: def turn_to_messages(self, turn: Turn) -> list[Message]:
messages = [] messages: list[Message] = []
# NOTE: if a toolcall response is in a step, we do not add it when processing the input messages # NOTE: if a toolcall response is in a step, we do not add it when processing the input messages
tool_call_ids = set() tool_call_ids = set()
for step in turn.steps: for step in turn.steps:
if step.step_type == StepType.tool_execution.value: if step.step_type == StepType.tool_execution.value and isinstance(step, ToolExecutionStep):
for response in step.tool_responses: for response in step.tool_responses:
tool_call_ids.add(response.call_id) tool_call_ids.add(response.call_id)
@ -149,9 +150,9 @@ class ChatAgent(ShieldRunnerMixin):
messages.append(msg) messages.append(msg)
for step in turn.steps: for step in turn.steps:
if step.step_type == StepType.inference.value: if step.step_type == StepType.inference.value and isinstance(step, InferenceStep):
messages.append(step.model_response) messages.append(step.model_response)
elif step.step_type == StepType.tool_execution.value: elif step.step_type == StepType.tool_execution.value and isinstance(step, ToolExecutionStep):
for response in step.tool_responses: for response in step.tool_responses:
messages.append( messages.append(
ToolResponseMessage( ToolResponseMessage(
@ -159,8 +160,8 @@ class ChatAgent(ShieldRunnerMixin):
content=response.content, content=response.content,
) )
) )
elif step.step_type == StepType.shield_call.value: elif step.step_type == StepType.shield_call.value and isinstance(step, ShieldCallStep):
if step.violation: if step.violation and step.violation.user_message:
# CompletionMessage itself in the ShieldResponse # CompletionMessage itself in the ShieldResponse
messages.append( messages.append(
CompletionMessage( CompletionMessage(
@ -174,7 +175,7 @@ class ChatAgent(ShieldRunnerMixin):
return await self.storage.create_session(name) return await self.storage.create_session(name)
async def get_messages_from_turns(self, turns: list[Turn]) -> list[Message]: async def get_messages_from_turns(self, turns: list[Turn]) -> list[Message]:
messages = [] messages: list[Message] = []
if self.agent_config.instructions != "": if self.agent_config.instructions != "":
messages.append(SystemMessage(content=self.agent_config.instructions)) messages.append(SystemMessage(content=self.agent_config.instructions))
@ -231,7 +232,9 @@ class ChatAgent(ShieldRunnerMixin):
steps = [] steps = []
messages = await self.get_messages_from_turns(turns) messages = await self.get_messages_from_turns(turns)
if is_resume: if is_resume:
assert isinstance(request, AgentTurnResumeRequest)
tool_response_messages = [ tool_response_messages = [
ToolResponseMessage(call_id=x.call_id, content=x.content) for x in request.tool_responses ToolResponseMessage(call_id=x.call_id, content=x.content) for x in request.tool_responses
] ]
@ -252,42 +255,52 @@ class ChatAgent(ShieldRunnerMixin):
in_progress_tool_call_step = await self.storage.get_in_progress_tool_call_step( in_progress_tool_call_step = await self.storage.get_in_progress_tool_call_step(
request.session_id, request.turn_id request.session_id, request.turn_id
) )
now = datetime.now(UTC).isoformat() now_dt = datetime.now(UTC)
tool_execution_step = ToolExecutionStep( tool_execution_step = ToolExecutionStep(
step_id=(in_progress_tool_call_step.step_id if in_progress_tool_call_step else str(uuid.uuid4())), step_id=(in_progress_tool_call_step.step_id if in_progress_tool_call_step else str(uuid.uuid4())),
turn_id=request.turn_id, turn_id=request.turn_id,
tool_calls=(in_progress_tool_call_step.tool_calls if in_progress_tool_call_step else []), tool_calls=(in_progress_tool_call_step.tool_calls if in_progress_tool_call_step else []),
tool_responses=request.tool_responses, tool_responses=request.tool_responses,
completed_at=now, completed_at=now_dt,
started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now), started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now_dt),
) )
steps.append(tool_execution_step) steps.append(tool_execution_step)
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepCompletePayload( payload=AgentTurnResponseStepCompletePayload(
step_type=StepType.tool_execution.value, step_type=StepType.tool_execution,
step_id=tool_execution_step.step_id, step_id=tool_execution_step.step_id,
step_details=tool_execution_step, step_details=tool_execution_step,
) )
) )
) )
input_messages = last_turn.input_messages # Cast needed due to list invariance - last_turn.input_messages is the right type
input_messages = last_turn.input_messages # type: ignore[assignment]
turn_id = request.turn_id actual_turn_id = request.turn_id
start_time = last_turn.started_at start_time = last_turn.started_at
else: else:
assert isinstance(request, AgentTurnCreateRequest)
messages.extend(request.messages) messages.extend(request.messages)
start_time = datetime.now(UTC).isoformat() start_time = datetime.now(UTC)
input_messages = request.messages # Cast needed due to list invariance - request.messages is the right type
input_messages = request.messages # type: ignore[assignment]
# Use the generated turn_id from beginning of function
actual_turn_id = turn_id if turn_id else str(uuid.uuid4())
output_message = None output_message = None
req_documents = request.documents if isinstance(request, AgentTurnCreateRequest) and not is_resume else None
req_sampling = (
self.agent_config.sampling_params if self.agent_config.sampling_params is not None else SamplingParams()
)
async for chunk in self.run( async for chunk in self.run(
session_id=request.session_id, session_id=request.session_id,
turn_id=turn_id, turn_id=actual_turn_id,
input_messages=messages, input_messages=messages,
sampling_params=self.agent_config.sampling_params, sampling_params=req_sampling,
stream=request.stream, stream=request.stream,
documents=request.documents if not is_resume else None, documents=req_documents,
): ):
if isinstance(chunk, CompletionMessage): if isinstance(chunk, CompletionMessage):
output_message = chunk output_message = chunk
@ -295,20 +308,23 @@ class ChatAgent(ShieldRunnerMixin):
assert isinstance(chunk, AgentTurnResponseStreamChunk), f"Unexpected type {type(chunk)}" assert isinstance(chunk, AgentTurnResponseStreamChunk), f"Unexpected type {type(chunk)}"
event = chunk.event event = chunk.event
if event.payload.event_type == AgentTurnResponseEventType.step_complete.value: if event.payload.event_type == AgentTurnResponseEventType.step_complete.value and hasattr(
steps.append(event.payload.step_details) event.payload, "step_details"
):
step_details = event.payload.step_details
steps.append(step_details)
yield chunk yield chunk
assert output_message is not None assert output_message is not None
turn = Turn( turn = Turn(
turn_id=turn_id, turn_id=actual_turn_id,
session_id=request.session_id, session_id=request.session_id,
input_messages=input_messages, input_messages=input_messages, # type: ignore[arg-type]
output_message=output_message, output_message=output_message,
started_at=start_time, started_at=start_time,
completed_at=datetime.now(UTC).isoformat(), completed_at=datetime.now(UTC),
steps=steps, steps=steps,
) )
await self.storage.add_turn_to_session(request.session_id, turn) await self.storage.add_turn_to_session(request.session_id, turn)
@ -345,9 +361,9 @@ class ChatAgent(ShieldRunnerMixin):
# return a "final value" for the `yield from` statement. we simulate that by yielding a # return a "final value" for the `yield from` statement. we simulate that by yielding a
# final boolean (to see whether an exception happened) and then explicitly testing for it. # final boolean (to see whether an exception happened) and then explicitly testing for it.
if len(self.input_shields) > 0: if self.input_shields:
async for res in self.run_multiple_shields_wrapper( async for res in self.run_multiple_shields_wrapper(
turn_id, input_messages, self.input_shields, "user-input" turn_id, cast(list[OpenAIMessageParam], input_messages), self.input_shields, "user-input"
): ):
if isinstance(res, bool): if isinstance(res, bool):
return return
@ -374,9 +390,9 @@ class ChatAgent(ShieldRunnerMixin):
# for output shields run on the full input and output combination # for output shields run on the full input and output combination
messages = input_messages + [final_response] messages = input_messages + [final_response]
if len(self.output_shields) > 0: if self.output_shields:
async for res in self.run_multiple_shields_wrapper( async for res in self.run_multiple_shields_wrapper(
turn_id, messages, self.output_shields, "assistant-output" turn_id, cast(list[OpenAIMessageParam], messages), self.output_shields, "assistant-output"
): ):
if isinstance(res, bool): if isinstance(res, bool):
return return
@ -388,7 +404,7 @@ class ChatAgent(ShieldRunnerMixin):
async def run_multiple_shields_wrapper( async def run_multiple_shields_wrapper(
self, self,
turn_id: str, turn_id: str,
messages: list[Message], messages: list[OpenAIMessageParam],
shields: list[str], shields: list[str],
touchpoint: str, touchpoint: str,
) -> AsyncGenerator: ) -> AsyncGenerator:
@ -402,12 +418,12 @@ class ChatAgent(ShieldRunnerMixin):
return return
step_id = str(uuid.uuid4()) step_id = str(uuid.uuid4())
shield_call_start_time = datetime.now(UTC).isoformat() shield_call_start_time = datetime.now(UTC)
try: try:
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepStartPayload( payload=AgentTurnResponseStepStartPayload(
step_type=StepType.shield_call.value, step_type=StepType.shield_call,
step_id=step_id, step_id=step_id,
metadata=dict(touchpoint=touchpoint), metadata=dict(touchpoint=touchpoint),
) )
@ -419,14 +435,14 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepCompletePayload( payload=AgentTurnResponseStepCompletePayload(
step_type=StepType.shield_call.value, step_type=StepType.shield_call,
step_id=step_id, step_id=step_id,
step_details=ShieldCallStep( step_details=ShieldCallStep(
step_id=step_id, step_id=step_id,
turn_id=turn_id, turn_id=turn_id,
violation=e.violation, violation=e.violation,
started_at=shield_call_start_time, started_at=shield_call_start_time,
completed_at=datetime.now(UTC).isoformat(), completed_at=datetime.now(UTC),
), ),
) )
) )
@ -443,14 +459,14 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepCompletePayload( payload=AgentTurnResponseStepCompletePayload(
step_type=StepType.shield_call.value, step_type=StepType.shield_call,
step_id=step_id, step_id=step_id,
step_details=ShieldCallStep( step_details=ShieldCallStep(
step_id=step_id, step_id=step_id,
turn_id=turn_id, turn_id=turn_id,
violation=None, violation=None,
started_at=shield_call_start_time, started_at=shield_call_start_time,
completed_at=datetime.now(UTC).isoformat(), completed_at=datetime.now(UTC),
), ),
) )
) )
@ -496,21 +512,22 @@ class ChatAgent(ShieldRunnerMixin):
else: else:
self.tool_name_to_args[tool_name]["vector_store_ids"].append(session_info.vector_store_id) self.tool_name_to_args[tool_name]["vector_store_ids"].append(session_info.vector_store_id)
output_attachments = [] output_attachments: list[Attachment] = []
n_iter = await self.storage.get_num_infer_iters_in_turn(session_id, turn_id) or 0 n_iter = await self.storage.get_num_infer_iters_in_turn(session_id, turn_id) or 0
# Build a map of custom tools to their definitions for faster lookup # Build a map of custom tools to their definitions for faster lookup
client_tools = {} client_tools = {}
for tool in self.agent_config.client_tools: if self.agent_config.client_tools:
client_tools[tool.name] = tool for tool in self.agent_config.client_tools:
client_tools[tool.name] = tool
while True: while True:
step_id = str(uuid.uuid4()) step_id = str(uuid.uuid4())
inference_start_time = datetime.now(UTC).isoformat() inference_start_time = datetime.now(UTC)
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepStartPayload( payload=AgentTurnResponseStepStartPayload(
step_type=StepType.inference.value, step_type=StepType.inference,
step_id=step_id, step_id=step_id,
) )
) )
@ -538,7 +555,7 @@ class ChatAgent(ShieldRunnerMixin):
else: else:
return value return value
def _add_type(openai_msg: dict) -> OpenAIMessageParam: def _add_type(openai_msg: Any) -> OpenAIMessageParam:
# Serialize any nested Pydantic models to plain dicts # Serialize any nested Pydantic models to plain dicts
openai_msg = _serialize_nested(openai_msg) openai_msg = _serialize_nested(openai_msg)
@ -588,7 +605,7 @@ class ChatAgent(ShieldRunnerMixin):
messages=openai_messages, messages=openai_messages,
tools=openai_tools if openai_tools else None, tools=openai_tools if openai_tools else None,
tool_choice=tool_choice, tool_choice=tool_choice,
response_format=self.agent_config.response_format, response_format=self.agent_config.response_format, # type: ignore[arg-type]
temperature=temperature, temperature=temperature,
top_p=top_p, top_p=top_p,
max_tokens=max_tokens, max_tokens=max_tokens,
@ -598,7 +615,8 @@ class ChatAgent(ShieldRunnerMixin):
# Convert OpenAI stream back to Llama Stack format # Convert OpenAI stream back to Llama Stack format
response_stream = convert_openai_chat_completion_stream( response_stream = convert_openai_chat_completion_stream(
openai_stream, enable_incremental_tool_calls=True openai_stream, # type: ignore[arg-type]
enable_incremental_tool_calls=True,
) )
async for chunk in response_stream: async for chunk in response_stream:
@ -620,7 +638,7 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepProgressPayload( payload=AgentTurnResponseStepProgressPayload(
step_type=StepType.inference.value, step_type=StepType.inference,
step_id=step_id, step_id=step_id,
delta=delta, delta=delta,
) )
@ -633,7 +651,7 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepProgressPayload( payload=AgentTurnResponseStepProgressPayload(
step_type=StepType.inference.value, step_type=StepType.inference,
step_id=step_id, step_id=step_id,
delta=delta, delta=delta,
) )
@ -651,7 +669,9 @@ class ChatAgent(ShieldRunnerMixin):
output_attr = json.dumps( output_attr = json.dumps(
{ {
"content": content, "content": content,
"tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls], "tool_calls": [
json.loads(t.model_dump_json()) for t in tool_calls if isinstance(t, ToolCall)
],
} }
) )
span.set_attribute("output", output_attr) span.set_attribute("output", output_attr)
@ -667,16 +687,18 @@ class ChatAgent(ShieldRunnerMixin):
if tool_calls: if tool_calls:
content = "" content = ""
# Filter out string tool calls for CompletionMessage (only keep ToolCall objects)
valid_tool_calls = [t for t in tool_calls if isinstance(t, ToolCall)]
message = CompletionMessage( message = CompletionMessage(
content=content, content=content,
stop_reason=stop_reason, stop_reason=stop_reason,
tool_calls=tool_calls, tool_calls=valid_tool_calls if valid_tool_calls else None,
) )
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepCompletePayload( payload=AgentTurnResponseStepCompletePayload(
step_type=StepType.inference.value, step_type=StepType.inference,
step_id=step_id, step_id=step_id,
step_details=InferenceStep( step_details=InferenceStep(
# somewhere deep, we are re-assigning message or closing over some # somewhere deep, we are re-assigning message or closing over some
@ -686,13 +708,14 @@ class ChatAgent(ShieldRunnerMixin):
turn_id=turn_id, turn_id=turn_id,
model_response=copy.deepcopy(message), model_response=copy.deepcopy(message),
started_at=inference_start_time, started_at=inference_start_time,
completed_at=datetime.now(UTC).isoformat(), completed_at=datetime.now(UTC),
), ),
) )
) )
) )
if n_iter >= self.agent_config.max_infer_iters: max_iters = self.agent_config.max_infer_iters if self.agent_config.max_infer_iters is not None else 10
if n_iter >= max_iters:
logger.info(f"done with MAX iterations ({n_iter}), exiting.") logger.info(f"done with MAX iterations ({n_iter}), exiting.")
# NOTE: mark end_of_turn to indicate to client that we are done with the turn # NOTE: mark end_of_turn to indicate to client that we are done with the turn
# Do not continue the tool call loop after this point # Do not continue the tool call loop after this point
@ -705,14 +728,16 @@ class ChatAgent(ShieldRunnerMixin):
yield message yield message
break break
if len(message.tool_calls) == 0: if not message.tool_calls or len(message.tool_calls) == 0:
if stop_reason == StopReason.end_of_turn: if stop_reason == StopReason.end_of_turn:
# TODO: UPDATE RETURN TYPE TO SEND A TUPLE OF (MESSAGE, ATTACHMENTS) # TODO: UPDATE RETURN TYPE TO SEND A TUPLE OF (MESSAGE, ATTACHMENTS)
if len(output_attachments) > 0: if len(output_attachments) > 0:
if isinstance(message.content, list): if isinstance(message.content, list):
message.content += output_attachments # List invariance - attachments are compatible at runtime
message.content += output_attachments # type: ignore[arg-type]
else: else:
message.content = [message.content] + output_attachments # List invariance - attachments are compatible at runtime
message.content = [message.content] + output_attachments # type: ignore[assignment]
yield message yield message
else: else:
logger.debug(f"completion message with EOM (iter: {n_iter}): {str(message)}") logger.debug(f"completion message with EOM (iter: {n_iter}): {str(message)}")
@ -725,11 +750,12 @@ class ChatAgent(ShieldRunnerMixin):
non_client_tool_calls = [] non_client_tool_calls = []
# Separate client and non-client tool calls # Separate client and non-client tool calls
for tool_call in message.tool_calls: if message.tool_calls:
if tool_call.tool_name in client_tools: for tool_call in message.tool_calls:
client_tool_calls.append(tool_call) if tool_call.tool_name in client_tools:
else: client_tool_calls.append(tool_call)
non_client_tool_calls.append(tool_call) else:
non_client_tool_calls.append(tool_call)
# Process non-client tool calls first # Process non-client tool calls first
for tool_call in non_client_tool_calls: for tool_call in non_client_tool_calls:
@ -737,7 +763,7 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepStartPayload( payload=AgentTurnResponseStepStartPayload(
step_type=StepType.tool_execution.value, step_type=StepType.tool_execution,
step_id=step_id, step_id=step_id,
) )
) )
@ -746,7 +772,7 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepProgressPayload( payload=AgentTurnResponseStepProgressPayload(
step_type=StepType.tool_execution.value, step_type=StepType.tool_execution,
step_id=step_id, step_id=step_id,
delta=ToolCallDelta( delta=ToolCallDelta(
parse_status=ToolCallParseStatus.in_progress, parse_status=ToolCallParseStatus.in_progress,
@ -766,7 +792,7 @@ class ChatAgent(ShieldRunnerMixin):
if self.telemetry_enabled if self.telemetry_enabled
else {}, else {},
) as span: ) as span:
tool_execution_start_time = datetime.now(UTC).isoformat() tool_execution_start_time = datetime.now(UTC)
tool_result = await self.execute_tool_call_maybe( tool_result = await self.execute_tool_call_maybe(
session_id, session_id,
tool_call, tool_call,
@ -796,14 +822,14 @@ class ChatAgent(ShieldRunnerMixin):
) )
], ],
started_at=tool_execution_start_time, started_at=tool_execution_start_time,
completed_at=datetime.now(UTC).isoformat(), completed_at=datetime.now(UTC),
) )
# Yield the step completion event # Yield the step completion event
yield AgentTurnResponseStreamChunk( yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent( event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepCompletePayload( payload=AgentTurnResponseStepCompletePayload(
step_type=StepType.tool_execution.value, step_type=StepType.tool_execution,
step_id=step_id, step_id=step_id,
step_details=tool_execution_step, step_details=tool_execution_step,
) )
@ -833,7 +859,7 @@ class ChatAgent(ShieldRunnerMixin):
turn_id=turn_id, turn_id=turn_id,
tool_calls=client_tool_calls, tool_calls=client_tool_calls,
tool_responses=[], tool_responses=[],
started_at=datetime.now(UTC).isoformat(), started_at=datetime.now(UTC),
), ),
) )
@ -868,19 +894,20 @@ class ChatAgent(ShieldRunnerMixin):
toolgroup_to_args = toolgroup_to_args or {} toolgroup_to_args = toolgroup_to_args or {}
tool_name_to_def = {} tool_name_to_def: dict[str, ToolDefinition] = {}
tool_name_to_args = {} tool_name_to_args = {}
for tool_def in self.agent_config.client_tools: if self.agent_config.client_tools:
if tool_name_to_def.get(tool_def.name, None): for tool_def in self.agent_config.client_tools:
raise ValueError(f"Tool {tool_def.name} already exists") if tool_name_to_def.get(tool_def.name, None):
raise ValueError(f"Tool {tool_def.name} already exists")
# Use input_schema from ToolDef directly # Use input_schema from ToolDef directly
tool_name_to_def[tool_def.name] = ToolDefinition( tool_name_to_def[tool_def.name] = ToolDefinition(
tool_name=tool_def.name, tool_name=tool_def.name,
description=tool_def.description, description=tool_def.description,
input_schema=tool_def.input_schema, input_schema=tool_def.input_schema,
) )
for toolgroup_name_with_maybe_tool_name in agent_config_toolgroups: for toolgroup_name_with_maybe_tool_name in agent_config_toolgroups:
toolgroup_name, input_tool_name = self._parse_toolgroup_name(toolgroup_name_with_maybe_tool_name) toolgroup_name, input_tool_name = self._parse_toolgroup_name(toolgroup_name_with_maybe_tool_name)
tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_name) tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_name)
@ -908,15 +935,17 @@ class ChatAgent(ShieldRunnerMixin):
else: else:
identifier = None identifier = None
if tool_name_to_def.get(identifier, None):
raise ValueError(f"Tool {identifier} already exists")
if identifier: if identifier:
tool_name_to_def[identifier] = ToolDefinition( # Convert BuiltinTool to string for dictionary key
tool_name=identifier, identifier_str = identifier.value if isinstance(identifier, BuiltinTool) else identifier
if tool_name_to_def.get(identifier_str, None):
raise ValueError(f"Tool {identifier_str} already exists")
tool_name_to_def[identifier_str] = ToolDefinition(
tool_name=identifier_str,
description=tool_def.description, description=tool_def.description,
input_schema=tool_def.input_schema, input_schema=tool_def.input_schema,
) )
tool_name_to_args[identifier] = toolgroup_to_args.get(toolgroup_name, {}) tool_name_to_args[identifier_str] = toolgroup_to_args.get(toolgroup_name, {})
self.tool_defs, self.tool_name_to_args = ( self.tool_defs, self.tool_name_to_args = (
list(tool_name_to_def.values()), list(tool_name_to_def.values()),
@ -966,14 +995,17 @@ class ChatAgent(ShieldRunnerMixin):
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
raise ValueError(f"Failed to parse arguments for tool call: {tool_call.arguments}") from e raise ValueError(f"Failed to parse arguments for tool call: {tool_call.arguments}") from e
result = await self.tool_runtime_api.invoke_tool( result = cast(
tool_name=tool_name_str, ToolInvocationResult,
kwargs={ await self.tool_runtime_api.invoke_tool(
"session_id": session_id, tool_name=tool_name_str,
# get the arguments generated by the model and augment with toolgroup arg overrides for the agent kwargs={
**args, "session_id": session_id,
**self.tool_name_to_args.get(tool_name_str, {}), # get the arguments generated by the model and augment with toolgroup arg overrides for the agent
}, **args,
**self.tool_name_to_args.get(tool_name_str, {}),
},
),
) )
logger.debug(f"tool call {tool_name_str} completed with result: {result}") logger.debug(f"tool call {tool_name_str} completed with result: {result}")
return result return result
@ -1017,7 +1049,7 @@ def _interpret_content_as_attachment(
snippet = match.group(1) snippet = match.group(1)
data = json.loads(snippet) data = json.loads(snippet)
return Attachment( return Attachment(
url=URL(uri="file://" + data["filepath"]), content=URL(uri="file://" + data["filepath"]),
mime_type=data["mimetype"], mime_type=data["mimetype"],
) )

View file

@ -21,6 +21,7 @@ from llama_stack.apis.agents import (
Document, Document,
ListOpenAIResponseInputItem, ListOpenAIResponseInputItem,
ListOpenAIResponseObject, ListOpenAIResponseObject,
OpenAIDeleteResponseObject,
OpenAIResponseInput, OpenAIResponseInput,
OpenAIResponseInputTool, OpenAIResponseInputTool,
OpenAIResponseObject, OpenAIResponseObject,
@ -141,7 +142,7 @@ class MetaReferenceAgentsImpl(Agents):
persistence_store=( persistence_store=(
self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store
), ),
created_at=agent_info.created_at, created_at=agent_info.created_at.isoformat(),
policy=self.policy, policy=self.policy,
telemetry_enabled=self.telemetry_enabled, telemetry_enabled=self.telemetry_enabled,
) )
@ -163,9 +164,9 @@ class MetaReferenceAgentsImpl(Agents):
agent_id: str, agent_id: str,
session_id: str, session_id: str,
messages: list[UserMessage | ToolResponseMessage], messages: list[UserMessage | ToolResponseMessage],
toolgroups: list[AgentToolGroup] | None = None,
documents: list[Document] | None = None,
stream: bool | None = False, stream: bool | None = False,
documents: list[Document] | None = None,
toolgroups: list[AgentToolGroup] | None = None,
tool_config: ToolConfig | None = None, tool_config: ToolConfig | None = None,
) -> AsyncGenerator: ) -> AsyncGenerator:
request = AgentTurnCreateRequest( request = AgentTurnCreateRequest(
@ -221,6 +222,8 @@ class MetaReferenceAgentsImpl(Agents):
async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn: async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn:
agent = await self._get_agent_impl(agent_id) agent = await self._get_agent_impl(agent_id)
turn = await agent.storage.get_session_turn(session_id, turn_id) turn = await agent.storage.get_session_turn(session_id, turn_id)
if turn is None:
raise ValueError(f"Turn {turn_id} not found in session {session_id}")
return turn return turn
async def get_agents_step(self, agent_id: str, session_id: str, turn_id: str, step_id: str) -> AgentStepResponse: async def get_agents_step(self, agent_id: str, session_id: str, turn_id: str, step_id: str) -> AgentStepResponse:
@ -232,13 +235,15 @@ class MetaReferenceAgentsImpl(Agents):
async def get_agents_session( async def get_agents_session(
self, self,
agent_id: str,
session_id: str, session_id: str,
agent_id: str,
turn_ids: list[str] | None = None, turn_ids: list[str] | None = None,
) -> Session: ) -> Session:
agent = await self._get_agent_impl(agent_id) agent = await self._get_agent_impl(agent_id)
session_info = await agent.storage.get_session_info(session_id) session_info = await agent.storage.get_session_info(session_id)
if session_info is None:
raise ValueError(f"Session {session_id} not found")
turns = await agent.storage.get_session_turns(session_id) turns = await agent.storage.get_session_turns(session_id)
if turn_ids: if turn_ids:
turns = [turn for turn in turns if turn.turn_id in turn_ids] turns = [turn for turn in turns if turn.turn_id in turn_ids]
@ -249,7 +254,7 @@ class MetaReferenceAgentsImpl(Agents):
started_at=session_info.started_at, started_at=session_info.started_at,
) )
async def delete_agents_session(self, agent_id: str, session_id: str) -> None: async def delete_agents_session(self, session_id: str, agent_id: str) -> None:
agent = await self._get_agent_impl(agent_id) agent = await self._get_agent_impl(agent_id)
# Delete turns first, then the session # Delete turns first, then the session
@ -302,7 +307,7 @@ class MetaReferenceAgentsImpl(Agents):
agent = Agent( agent = Agent(
agent_id=agent_id, agent_id=agent_id,
agent_config=chat_agent.agent_config, agent_config=chat_agent.agent_config,
created_at=chat_agent.created_at, created_at=datetime.fromisoformat(chat_agent.created_at),
) )
return agent return agent
@ -323,6 +328,7 @@ class MetaReferenceAgentsImpl(Agents):
self, self,
response_id: str, response_id: str,
) -> OpenAIResponseObject: ) -> OpenAIResponseObject:
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
return await self.openai_responses_impl.get_openai_response(response_id) return await self.openai_responses_impl.get_openai_response(response_id)
async def create_openai_response( async def create_openai_response(
@ -342,7 +348,8 @@ class MetaReferenceAgentsImpl(Agents):
max_infer_iters: int | None = 10, max_infer_iters: int | None = 10,
guardrails: list[ResponseGuardrail] | None = None, guardrails: list[ResponseGuardrail] | None = None,
) -> OpenAIResponseObject: ) -> OpenAIResponseObject:
return await self.openai_responses_impl.create_openai_response( assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
result = await self.openai_responses_impl.create_openai_response(
input, input,
model, model,
prompt, prompt,
@ -358,6 +365,7 @@ class MetaReferenceAgentsImpl(Agents):
max_infer_iters, max_infer_iters,
guardrails, guardrails,
) )
return result # type: ignore[no-any-return]
async def list_openai_responses( async def list_openai_responses(
self, self,
@ -366,6 +374,7 @@ class MetaReferenceAgentsImpl(Agents):
model: str | None = None, model: str | None = None,
order: Order | None = Order.desc, order: Order | None = Order.desc,
) -> ListOpenAIResponseObject: ) -> ListOpenAIResponseObject:
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
return await self.openai_responses_impl.list_openai_responses(after, limit, model, order) return await self.openai_responses_impl.list_openai_responses(after, limit, model, order)
async def list_openai_response_input_items( async def list_openai_response_input_items(
@ -377,9 +386,11 @@ class MetaReferenceAgentsImpl(Agents):
limit: int | None = 20, limit: int | None = 20,
order: Order | None = Order.desc, order: Order | None = Order.desc,
) -> ListOpenAIResponseInputItem: ) -> ListOpenAIResponseInputItem:
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
return await self.openai_responses_impl.list_openai_response_input_items( return await self.openai_responses_impl.list_openai_response_input_items(
response_id, after, before, include, limit, order response_id, after, before, include, limit, order
) )
async def delete_openai_response(self, response_id: str) -> None: async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
return await self.openai_responses_impl.delete_openai_response(response_id) return await self.openai_responses_impl.delete_openai_response(response_id)

View file

@ -6,12 +6,14 @@
import json import json
import uuid import uuid
from dataclasses import dataclass
from datetime import UTC, datetime from datetime import UTC, datetime
from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn
from llama_stack.apis.common.errors import SessionNotFoundError from llama_stack.apis.common.errors import SessionNotFoundError
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.core.access_control.conditions import User as ProtocolUser
from llama_stack.core.access_control.datatypes import AccessRule, Action
from llama_stack.core.datatypes import User from llama_stack.core.datatypes import User
from llama_stack.core.request_headers import get_authenticated_user from llama_stack.core.request_headers import get_authenticated_user
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -33,6 +35,15 @@ class AgentInfo(AgentConfig):
created_at: datetime created_at: datetime
@dataclass
class SessionResource:
"""Concrete implementation of ProtectedResource for session access control."""
type: str
identifier: str
owner: ProtocolUser # Use the protocol type for structural compatibility
class AgentPersistence: class AgentPersistence:
def __init__(self, agent_id: str, kvstore: KVStore, policy: list[AccessRule]): def __init__(self, agent_id: str, kvstore: KVStore, policy: list[AccessRule]):
self.agent_id = agent_id self.agent_id = agent_id
@ -53,8 +64,15 @@ class AgentPersistence:
turns=[], turns=[],
identifier=name, # should this be qualified in any way? identifier=name, # should this be qualified in any way?
) )
if not is_action_allowed(self.policy, "create", session_info, user): # Only perform access control if we have an authenticated user
raise AccessDeniedError("create", session_info, user) if user is not None and session_info.identifier is not None:
resource = SessionResource(
type=session_info.type,
identifier=session_info.identifier,
owner=user,
)
if not is_action_allowed(self.policy, Action.CREATE, resource, user):
raise AccessDeniedError(Action.CREATE, resource, user)
await self.kvstore.set( await self.kvstore.set(
key=f"session:{self.agent_id}:{session_id}", key=f"session:{self.agent_id}:{session_id}",
@ -62,7 +80,7 @@ class AgentPersistence:
) )
return session_id return session_id
async def get_session_info(self, session_id: str) -> AgentSessionInfo: async def get_session_info(self, session_id: str) -> AgentSessionInfo | None:
value = await self.kvstore.get( value = await self.kvstore.get(
key=f"session:{self.agent_id}:{session_id}", key=f"session:{self.agent_id}:{session_id}",
) )
@ -83,7 +101,22 @@ class AgentPersistence:
if not hasattr(session_info, "access_attributes") and not hasattr(session_info, "owner"): if not hasattr(session_info, "access_attributes") and not hasattr(session_info, "owner"):
return True return True
return is_action_allowed(self.policy, "read", session_info, get_authenticated_user()) # Get current user - if None, skip access control (e.g., in tests)
user = get_authenticated_user()
if user is None:
return True
# Access control requires identifier and owner to be set
if session_info.identifier is None or session_info.owner is None:
return True
# At this point, both identifier and owner are guaranteed to be non-None
resource = SessionResource(
type=session_info.type,
identifier=session_info.identifier,
owner=session_info.owner,
)
return is_action_allowed(self.policy, Action.READ, resource, user)
async def get_session_if_accessible(self, session_id: str) -> AgentSessionInfo | None: async def get_session_if_accessible(self, session_id: str) -> AgentSessionInfo | None:
"""Get session info if the user has access to it. For internal use by sub-session methods.""" """Get session info if the user has access to it. For internal use by sub-session methods."""

View file

@ -91,7 +91,8 @@ class OpenAIResponsesImpl:
input: str | list[OpenAIResponseInput], input: str | list[OpenAIResponseInput],
previous_response: _OpenAIResponseObjectWithInputAndMessages, previous_response: _OpenAIResponseObjectWithInputAndMessages,
): ):
new_input_items = previous_response.input.copy() # Convert Sequence to list for mutation
new_input_items = list(previous_response.input)
new_input_items.extend(previous_response.output) new_input_items.extend(previous_response.output)
if isinstance(input, str): if isinstance(input, str):
@ -107,7 +108,7 @@ class OpenAIResponsesImpl:
tools: list[OpenAIResponseInputTool] | None, tools: list[OpenAIResponseInputTool] | None,
previous_response_id: str | None, previous_response_id: str | None,
conversation: str | None, conversation: str | None,
) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam]]: ) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam], ToolContext]:
"""Process input with optional previous response context. """Process input with optional previous response context.
Returns: Returns:
@ -208,6 +209,9 @@ class OpenAIResponsesImpl:
messages: list[OpenAIMessageParam], messages: list[OpenAIMessageParam],
) -> None: ) -> None:
new_input_id = f"msg_{uuid.uuid4()}" new_input_id = f"msg_{uuid.uuid4()}"
# Type input_items_data as the full OpenAIResponseInput union to avoid list invariance issues
input_items_data: list[OpenAIResponseInput] = []
if isinstance(input, str): if isinstance(input, str):
# synthesize a message from the input string # synthesize a message from the input string
input_content = OpenAIResponseInputMessageContentText(text=input) input_content = OpenAIResponseInputMessageContentText(text=input)
@ -219,7 +223,6 @@ class OpenAIResponsesImpl:
input_items_data = [input_content_item] input_items_data = [input_content_item]
else: else:
# we already have a list of messages # we already have a list of messages
input_items_data = []
for input_item in input: for input_item in input:
if isinstance(input_item, OpenAIResponseMessage): if isinstance(input_item, OpenAIResponseMessage):
# These may or may not already have an id, so dump to dict, check for id, and add if missing # These may or may not already have an id, so dump to dict, check for id, and add if missing
@ -251,7 +254,7 @@ class OpenAIResponsesImpl:
tools: list[OpenAIResponseInputTool] | None = None, tools: list[OpenAIResponseInputTool] | None = None,
include: list[str] | None = None, include: list[str] | None = None,
max_infer_iters: int | None = 10, max_infer_iters: int | None = 10,
guardrails: list[ResponseGuardrailSpec] | None = None, guardrails: list[str | ResponseGuardrailSpec] | None = None,
): ):
stream = bool(stream) stream = bool(stream)
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
@ -289,16 +292,19 @@ class OpenAIResponsesImpl:
failed_response = None failed_response = None
async for stream_chunk in stream_gen: async for stream_chunk in stream_gen:
if stream_chunk.type in {"response.completed", "response.incomplete"}: match stream_chunk.type:
if final_response is not None: case "response.completed" | "response.incomplete":
raise ValueError( if final_response is not None:
"The response stream produced multiple terminal responses! " raise ValueError(
f"Earlier response from {final_event_type}" "The response stream produced multiple terminal responses! "
) f"Earlier response from {final_event_type}"
final_response = stream_chunk.response )
final_event_type = stream_chunk.type final_response = stream_chunk.response
elif stream_chunk.type == "response.failed": final_event_type = stream_chunk.type
failed_response = stream_chunk.response case "response.failed":
failed_response = stream_chunk.response
case _:
pass # Other event types don't have .response
if failed_response is not None: if failed_response is not None:
error_message = ( error_message = (
@ -326,6 +332,11 @@ class OpenAIResponsesImpl:
max_infer_iters: int | None = 10, max_infer_iters: int | None = 10,
guardrail_ids: list[str] | None = None, guardrail_ids: list[str] | None = None,
) -> AsyncIterator[OpenAIResponseObjectStream]: ) -> AsyncIterator[OpenAIResponseObjectStream]:
# These should never be None when called from create_openai_response (which sets defaults)
# but we assert here to help mypy understand the types
assert text is not None, "text must not be None"
assert max_infer_iters is not None, "max_infer_iters must not be None"
# Input preprocessing # Input preprocessing
all_input, messages, tool_context = await self._process_input_with_previous_response( all_input, messages, tool_context = await self._process_input_with_previous_response(
input, tools, previous_response_id, conversation input, tools, previous_response_id, conversation
@ -368,16 +379,19 @@ class OpenAIResponsesImpl:
final_response = None final_response = None
failed_response = None failed_response = None
output_items = [] # Type as ConversationItem to avoid list invariance issues
output_items: list[ConversationItem] = []
async for stream_chunk in orchestrator.create_response(): async for stream_chunk in orchestrator.create_response():
if stream_chunk.type in {"response.completed", "response.incomplete"}: match stream_chunk.type:
final_response = stream_chunk.response case "response.completed" | "response.incomplete":
elif stream_chunk.type == "response.failed": final_response = stream_chunk.response
failed_response = stream_chunk.response case "response.failed":
failed_response = stream_chunk.response
if stream_chunk.type == "response.output_item.done": case "response.output_item.done":
item = stream_chunk.item item = stream_chunk.item
output_items.append(item) output_items.append(item)
case _:
pass # Other event types
# Store and sync before yielding terminal events # Store and sync before yielding terminal events
# This ensures the storage/syncing happens even if the consumer breaks after receiving the event # This ensures the storage/syncing happens even if the consumer breaks after receiving the event
@ -410,7 +424,8 @@ class OpenAIResponsesImpl:
self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem] self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem]
) -> None: ) -> None:
"""Sync content and response messages to the conversation.""" """Sync content and response messages to the conversation."""
conversation_items = [] # Type as ConversationItem union to avoid list invariance issues
conversation_items: list[ConversationItem] = []
if isinstance(input, str): if isinstance(input, str):
conversation_items.append( conversation_items.append(

View file

@ -111,7 +111,7 @@ class StreamingResponseOrchestrator:
text: OpenAIResponseText, text: OpenAIResponseText,
max_infer_iters: int, max_infer_iters: int,
tool_executor, # Will be the tool execution logic from the main class tool_executor, # Will be the tool execution logic from the main class
instructions: str, instructions: str | None,
safety_api, safety_api,
guardrail_ids: list[str] | None = None, guardrail_ids: list[str] | None = None,
prompt: OpenAIResponsePrompt | None = None, prompt: OpenAIResponsePrompt | None = None,
@ -128,7 +128,9 @@ class StreamingResponseOrchestrator:
self.prompt = prompt self.prompt = prompt
self.sequence_number = 0 self.sequence_number = 0
# Store MCP tool mapping that gets built during tool processing # Store MCP tool mapping that gets built during tool processing
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ctx.tool_context.previous_tools or {} self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
ctx.tool_context.previous_tools if ctx.tool_context else {}
)
# Track final messages after all tool executions # Track final messages after all tool executions
self.final_messages: list[OpenAIMessageParam] = [] self.final_messages: list[OpenAIMessageParam] = []
# mapping for annotations # mapping for annotations
@ -229,7 +231,8 @@ class StreamingResponseOrchestrator:
params = OpenAIChatCompletionRequestWithExtraBody( params = OpenAIChatCompletionRequestWithExtraBody(
model=self.ctx.model, model=self.ctx.model,
messages=messages, messages=messages,
tools=self.ctx.chat_tools, # Pydantic models are dict-compatible but mypy treats them as distinct types
tools=self.ctx.chat_tools, # type: ignore[arg-type]
stream=True, stream=True,
temperature=self.ctx.temperature, temperature=self.ctx.temperature,
response_format=response_format, response_format=response_format,
@ -272,7 +275,12 @@ class StreamingResponseOrchestrator:
# Handle choices with no tool calls # Handle choices with no tool calls
for choice in current_response.choices: for choice in current_response.choices:
if not (choice.message.tool_calls and self.ctx.response_tools): has_tool_calls = (
isinstance(choice.message, OpenAIAssistantMessageParam)
and choice.message.tool_calls
and self.ctx.response_tools
)
if not has_tool_calls:
output_messages.append( output_messages.append(
await convert_chat_choice_to_response_message( await convert_chat_choice_to_response_message(
choice, choice,
@ -722,7 +730,10 @@ class StreamingResponseOrchestrator:
) )
# Accumulate arguments for final response (only for subsequent chunks) # Accumulate arguments for final response (only for subsequent chunks)
if not is_new_tool_call: if not is_new_tool_call and response_tool_call is not None:
# Both should have functions since we're inside the tool_call.function check above
assert response_tool_call.function is not None
assert tool_call.function is not None
response_tool_call.function.arguments = ( response_tool_call.function.arguments = (
response_tool_call.function.arguments or "" response_tool_call.function.arguments or ""
) + tool_call.function.arguments ) + tool_call.function.arguments
@ -747,10 +758,13 @@ class StreamingResponseOrchestrator:
for tool_call_index in sorted(chat_response_tool_calls.keys()): for tool_call_index in sorted(chat_response_tool_calls.keys()):
tool_call = chat_response_tool_calls[tool_call_index] tool_call = chat_response_tool_calls[tool_call_index]
# Ensure that arguments, if sent back to the inference provider, are not None # Ensure that arguments, if sent back to the inference provider, are not None
tool_call.function.arguments = tool_call.function.arguments or "{}" if tool_call.function:
tool_call.function.arguments = tool_call.function.arguments or "{}"
tool_call_item_id = tool_call_item_ids[tool_call_index] tool_call_item_id = tool_call_item_ids[tool_call_index]
final_arguments = tool_call.function.arguments final_arguments: str = tool_call.function.arguments or "{}" if tool_call.function else "{}"
tool_call_name = chat_response_tool_calls[tool_call_index].function.name func = chat_response_tool_calls[tool_call_index].function
tool_call_name = func.name if func else ""
# Check if this is an MCP tool call # Check if this is an MCP tool call
is_mcp_tool = tool_call_name and tool_call_name in self.mcp_tool_to_server is_mcp_tool = tool_call_name and tool_call_name in self.mcp_tool_to_server
@ -894,12 +908,11 @@ class StreamingResponseOrchestrator:
self.sequence_number += 1 self.sequence_number += 1
if tool_call.function.name and tool_call.function.name in self.mcp_tool_to_server: if tool_call.function.name and tool_call.function.name in self.mcp_tool_to_server:
item = OpenAIResponseOutputMessageMCPCall( item: OpenAIResponseOutput = OpenAIResponseOutputMessageMCPCall(
arguments="", arguments="",
name=tool_call.function.name, name=tool_call.function.name,
id=matching_item_id, id=matching_item_id,
server_label=self.mcp_tool_to_server[tool_call.function.name].server_label, server_label=self.mcp_tool_to_server[tool_call.function.name].server_label,
status="in_progress",
) )
elif tool_call.function.name == "web_search": elif tool_call.function.name == "web_search":
item = OpenAIResponseOutputMessageWebSearchToolCall( item = OpenAIResponseOutputMessageWebSearchToolCall(
@ -1008,7 +1021,7 @@ class StreamingResponseOrchestrator:
description=tool.description, description=tool.description,
input_schema=tool.input_schema, input_schema=tool.input_schema,
) )
return convert_tooldef_to_openai_tool(tool_def) return convert_tooldef_to_openai_tool(tool_def) # type: ignore[return-value] # Returns dict but ChatCompletionToolParam expects TypedDict
# Initialize chat_tools if not already set # Initialize chat_tools if not already set
if self.ctx.chat_tools is None: if self.ctx.chat_tools is None:
@ -1016,7 +1029,7 @@ class StreamingResponseOrchestrator:
for input_tool in tools: for input_tool in tools:
if input_tool.type == "function": if input_tool.type == "function":
self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) # type: ignore[typeddict-item,arg-type] # Dict compatible with FunctionDefinition
elif input_tool.type in WebSearchToolTypes: elif input_tool.type in WebSearchToolTypes:
tool_name = "web_search" tool_name = "web_search"
# Need to access tool_groups_api from tool_executor # Need to access tool_groups_api from tool_executor
@ -1055,8 +1068,8 @@ class StreamingResponseOrchestrator:
if isinstance(mcp_tool.allowed_tools, list): if isinstance(mcp_tool.allowed_tools, list):
always_allowed = mcp_tool.allowed_tools always_allowed = mcp_tool.allowed_tools
elif isinstance(mcp_tool.allowed_tools, AllowedToolsFilter): elif isinstance(mcp_tool.allowed_tools, AllowedToolsFilter):
always_allowed = mcp_tool.allowed_tools.always # AllowedToolsFilter only has tool_names field (not allowed/disallowed)
never_allowed = mcp_tool.allowed_tools.never always_allowed = mcp_tool.allowed_tools.tool_names
# Call list_mcp_tools # Call list_mcp_tools
tool_defs = None tool_defs = None
@ -1088,7 +1101,7 @@ class StreamingResponseOrchestrator:
openai_tool = convert_tooldef_to_chat_tool(t) openai_tool = convert_tooldef_to_chat_tool(t)
if self.ctx.chat_tools is None: if self.ctx.chat_tools is None:
self.ctx.chat_tools = [] self.ctx.chat_tools = []
self.ctx.chat_tools.append(openai_tool) self.ctx.chat_tools.append(openai_tool) # type: ignore[arg-type] # Returns dict but ChatCompletionToolParam expects TypedDict
# Add to MCP tool mapping # Add to MCP tool mapping
if t.name in self.mcp_tool_to_server: if t.name in self.mcp_tool_to_server:
@ -1120,13 +1133,17 @@ class StreamingResponseOrchestrator:
self, output_messages: list[OpenAIResponseOutput] self, output_messages: list[OpenAIResponseOutput]
) -> AsyncIterator[OpenAIResponseObjectStream]: ) -> AsyncIterator[OpenAIResponseObjectStream]:
# Handle all mcp tool lists from previous response that are still valid: # Handle all mcp tool lists from previous response that are still valid:
for tool in self.ctx.tool_context.previous_tool_listings: # tool_context can be None when no tools are provided in the response request
async for evt in self._reuse_mcp_list_tools(tool, output_messages): if self.ctx.tool_context:
yield evt for tool in self.ctx.tool_context.previous_tool_listings:
# Process all remaining tools (including MCP tools) and emit streaming events async for evt in self._reuse_mcp_list_tools(tool, output_messages):
if self.ctx.tool_context.tools_to_process: yield evt
async for stream_event in self._process_new_tools(self.ctx.tool_context.tools_to_process, output_messages): # Process all remaining tools (including MCP tools) and emit streaming events
yield stream_event if self.ctx.tool_context.tools_to_process:
async for stream_event in self._process_new_tools(
self.ctx.tool_context.tools_to_process, output_messages
):
yield stream_event
def _approval_required(self, tool_name: str) -> bool: def _approval_required(self, tool_name: str) -> bool:
if tool_name not in self.mcp_tool_to_server: if tool_name not in self.mcp_tool_to_server:
@ -1220,7 +1237,7 @@ class StreamingResponseOrchestrator:
openai_tool = convert_tooldef_to_openai_tool(tool_def) openai_tool = convert_tooldef_to_openai_tool(tool_def)
if self.ctx.chat_tools is None: if self.ctx.chat_tools is None:
self.ctx.chat_tools = [] self.ctx.chat_tools = []
self.ctx.chat_tools.append(openai_tool) self.ctx.chat_tools.append(openai_tool) # type: ignore[arg-type] # Returns dict but ChatCompletionToolParam expects TypedDict
mcp_list_message = OpenAIResponseOutputMessageMCPListTools( mcp_list_message = OpenAIResponseOutputMessageMCPListTools(
id=f"mcp_list_{uuid.uuid4()}", id=f"mcp_list_{uuid.uuid4()}",

View file

@ -7,6 +7,7 @@
import asyncio import asyncio
import json import json
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
from typing import Any
from llama_stack.apis.agents.openai_responses import ( from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputToolFileSearch, OpenAIResponseInputToolFileSearch,
@ -22,6 +23,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseObjectStreamResponseWebSearchCallSearching, OpenAIResponseObjectStreamResponseWebSearchCallSearching,
OpenAIResponseOutputMessageFileSearchToolCall, OpenAIResponseOutputMessageFileSearchToolCall,
OpenAIResponseOutputMessageFileSearchToolCallResults, OpenAIResponseOutputMessageFileSearchToolCallResults,
OpenAIResponseOutputMessageMCPCall,
OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseOutputMessageWebSearchToolCall,
) )
from llama_stack.apis.common.content_types import ( from llama_stack.apis.common.content_types import (
@ -67,7 +69,7 @@ class ToolExecutor:
) -> AsyncIterator[ToolExecutionResult]: ) -> AsyncIterator[ToolExecutionResult]:
tool_call_id = tool_call.id tool_call_id = tool_call.id
function = tool_call.function function = tool_call.function
tool_kwargs = json.loads(function.arguments) if function.arguments else {} tool_kwargs = json.loads(function.arguments) if function and function.arguments else {}
if not function or not tool_call_id or not function.name: if not function or not tool_call_id or not function.name:
yield ToolExecutionResult(sequence_number=sequence_number) yield ToolExecutionResult(sequence_number=sequence_number)
@ -84,7 +86,16 @@ class ToolExecutor:
error_exc, result = await self._execute_tool(function.name, tool_kwargs, ctx, mcp_tool_to_server) error_exc, result = await self._execute_tool(function.name, tool_kwargs, ctx, mcp_tool_to_server)
# Emit completion events for tool execution # Emit completion events for tool execution
has_error = error_exc or (result and ((result.error_code and result.error_code > 0) or result.error_message)) has_error = bool(
error_exc
or (
result
and (
((error_code := getattr(result, "error_code", None)) and error_code > 0)
or getattr(result, "error_message", None)
)
)
)
async for event_result in self._emit_completion_events( async for event_result in self._emit_completion_events(
function.name, ctx, sequence_number, output_index, item_id, has_error, mcp_tool_to_server function.name, ctx, sequence_number, output_index, item_id, has_error, mcp_tool_to_server
): ):
@ -101,7 +112,9 @@ class ToolExecutor:
sequence_number=sequence_number, sequence_number=sequence_number,
final_output_message=output_message, final_output_message=output_message,
final_input_message=input_message, final_input_message=input_message,
citation_files=result.metadata.get("citation_files") if result and result.metadata else None, citation_files=(
metadata.get("citation_files") if result and (metadata := getattr(result, "metadata", None)) else None
),
) )
async def _execute_knowledge_search_via_vector_store( async def _execute_knowledge_search_via_vector_store(
@ -188,8 +201,9 @@ class ToolExecutor:
citation_files[file_id] = filename citation_files[file_id] = filename
# Cast to proper InterleavedContent type (list invariance)
return ToolInvocationResult( return ToolInvocationResult(
content=content_items, content=content_items, # type: ignore[arg-type]
metadata={ metadata={
"document_ids": [r.file_id for r in search_results], "document_ids": [r.file_id for r in search_results],
"chunks": [r.content[0].text if r.content else "" for r in search_results], "chunks": [r.content[0].text if r.content else "" for r in search_results],
@ -209,51 +223,60 @@ class ToolExecutor:
) -> AsyncIterator[ToolExecutionResult]: ) -> AsyncIterator[ToolExecutionResult]:
"""Emit progress events for tool execution start.""" """Emit progress events for tool execution start."""
# Emit in_progress event based on tool type (only for tools with specific streaming events) # Emit in_progress event based on tool type (only for tools with specific streaming events)
progress_event = None
if mcp_tool_to_server and function_name in mcp_tool_to_server: if mcp_tool_to_server and function_name in mcp_tool_to_server:
sequence_number += 1 sequence_number += 1
progress_event = OpenAIResponseObjectStreamResponseMcpCallInProgress( yield ToolExecutionResult(
item_id=item_id, stream_event=OpenAIResponseObjectStreamResponseMcpCallInProgress(
output_index=output_index, item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
),
sequence_number=sequence_number, sequence_number=sequence_number,
) )
elif function_name == "web_search": elif function_name == "web_search":
sequence_number += 1 sequence_number += 1
progress_event = OpenAIResponseObjectStreamResponseWebSearchCallInProgress( yield ToolExecutionResult(
item_id=item_id, stream_event=OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
output_index=output_index, item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
),
sequence_number=sequence_number, sequence_number=sequence_number,
) )
elif function_name == "knowledge_search": elif function_name == "knowledge_search":
sequence_number += 1 sequence_number += 1
progress_event = OpenAIResponseObjectStreamResponseFileSearchCallInProgress( yield ToolExecutionResult(
item_id=item_id, stream_event=OpenAIResponseObjectStreamResponseFileSearchCallInProgress(
output_index=output_index, item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
),
sequence_number=sequence_number, sequence_number=sequence_number,
) )
if progress_event:
yield ToolExecutionResult(stream_event=progress_event, sequence_number=sequence_number)
# For web search, emit searching event # For web search, emit searching event
if function_name == "web_search": if function_name == "web_search":
sequence_number += 1 sequence_number += 1
searching_event = OpenAIResponseObjectStreamResponseWebSearchCallSearching( yield ToolExecutionResult(
item_id=item_id, stream_event=OpenAIResponseObjectStreamResponseWebSearchCallSearching(
output_index=output_index, item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
),
sequence_number=sequence_number, sequence_number=sequence_number,
) )
yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
# For file search, emit searching event # For file search, emit searching event
if function_name == "knowledge_search": if function_name == "knowledge_search":
sequence_number += 1 sequence_number += 1
searching_event = OpenAIResponseObjectStreamResponseFileSearchCallSearching( yield ToolExecutionResult(
item_id=item_id, stream_event=OpenAIResponseObjectStreamResponseFileSearchCallSearching(
output_index=output_index, item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
),
sequence_number=sequence_number, sequence_number=sequence_number,
) )
yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
async def _execute_tool( async def _execute_tool(
self, self,
@ -261,7 +284,7 @@ class ToolExecutor:
tool_kwargs: dict, tool_kwargs: dict,
ctx: ChatCompletionContext, ctx: ChatCompletionContext,
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
) -> tuple[Exception | None, any]: ) -> tuple[Exception | None, Any]:
"""Execute the tool and return error exception and result.""" """Execute the tool and return error exception and result."""
error_exc = None error_exc = None
result = None result = None
@ -284,9 +307,13 @@ class ToolExecutor:
kwargs=tool_kwargs, kwargs=tool_kwargs,
) )
elif function_name == "knowledge_search": elif function_name == "knowledge_search":
response_file_search_tool = next( response_file_search_tool = (
(t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)), next(
None, (t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
None,
)
if ctx.response_tools
else None
) )
if response_file_search_tool: if response_file_search_tool:
# Use vector_stores.search API instead of knowledge_search tool # Use vector_stores.search API instead of knowledge_search tool
@ -322,35 +349,34 @@ class ToolExecutor:
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
) -> AsyncIterator[ToolExecutionResult]: ) -> AsyncIterator[ToolExecutionResult]:
"""Emit completion or failure events for tool execution.""" """Emit completion or failure events for tool execution."""
completion_event = None
if mcp_tool_to_server and function_name in mcp_tool_to_server: if mcp_tool_to_server and function_name in mcp_tool_to_server:
sequence_number += 1 sequence_number += 1
if has_error: if has_error:
completion_event = OpenAIResponseObjectStreamResponseMcpCallFailed( mcp_failed_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
sequence_number=sequence_number, sequence_number=sequence_number,
) )
yield ToolExecutionResult(stream_event=mcp_failed_event, sequence_number=sequence_number)
else: else:
completion_event = OpenAIResponseObjectStreamResponseMcpCallCompleted( mcp_completed_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
sequence_number=sequence_number, sequence_number=sequence_number,
) )
yield ToolExecutionResult(stream_event=mcp_completed_event, sequence_number=sequence_number)
elif function_name == "web_search": elif function_name == "web_search":
sequence_number += 1 sequence_number += 1
completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted( web_completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
item_id=item_id, item_id=item_id,
output_index=output_index, output_index=output_index,
sequence_number=sequence_number, sequence_number=sequence_number,
) )
yield ToolExecutionResult(stream_event=web_completion_event, sequence_number=sequence_number)
elif function_name == "knowledge_search": elif function_name == "knowledge_search":
sequence_number += 1 sequence_number += 1
completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted( file_completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted(
item_id=item_id, item_id=item_id,
output_index=output_index, output_index=output_index,
sequence_number=sequence_number, sequence_number=sequence_number,
) )
yield ToolExecutionResult(stream_event=file_completion_event, sequence_number=sequence_number)
if completion_event:
yield ToolExecutionResult(stream_event=completion_event, sequence_number=sequence_number)
async def _build_result_messages( async def _build_result_messages(
self, self,
@ -360,21 +386,18 @@ class ToolExecutor:
tool_kwargs: dict, tool_kwargs: dict,
ctx: ChatCompletionContext, ctx: ChatCompletionContext,
error_exc: Exception | None, error_exc: Exception | None,
result: any, result: Any,
has_error: bool, has_error: bool,
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
) -> tuple[any, any]: ) -> tuple[Any, Any]:
"""Build output and input messages from tool execution results.""" """Build output and input messages from tool execution results."""
from llama_stack.providers.utils.inference.prompt_adapter import ( from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str, interleaved_content_as_str,
) )
# Build output message # Build output message
message: Any
if mcp_tool_to_server and function.name in mcp_tool_to_server: if mcp_tool_to_server and function.name in mcp_tool_to_server:
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageMCPCall,
)
message = OpenAIResponseOutputMessageMCPCall( message = OpenAIResponseOutputMessageMCPCall(
id=item_id, id=item_id,
arguments=function.arguments, arguments=function.arguments,
@ -383,10 +406,14 @@ class ToolExecutor:
) )
if error_exc: if error_exc:
message.error = str(error_exc) message.error = str(error_exc)
elif (result and result.error_code and result.error_code > 0) or (result and result.error_message): elif (result and (error_code := getattr(result, "error_code", None)) and error_code > 0) or (
message.error = f"Error (code {result.error_code}): {result.error_message}" result and getattr(result, "error_message", None)
elif result and result.content: ):
message.output = interleaved_content_as_str(result.content) ec = getattr(result, "error_code", "unknown")
em = getattr(result, "error_message", "")
message.error = f"Error (code {ec}): {em}"
elif result and (content := getattr(result, "content", None)):
message.output = interleaved_content_as_str(content)
else: else:
if function.name == "web_search": if function.name == "web_search":
message = OpenAIResponseOutputMessageWebSearchToolCall( message = OpenAIResponseOutputMessageWebSearchToolCall(
@ -401,17 +428,17 @@ class ToolExecutor:
queries=[tool_kwargs.get("query", "")], queries=[tool_kwargs.get("query", "")],
status="completed", status="completed",
) )
if result and "document_ids" in result.metadata: if result and (metadata := getattr(result, "metadata", None)) and "document_ids" in metadata:
message.results = [] message.results = []
for i, doc_id in enumerate(result.metadata["document_ids"]): for i, doc_id in enumerate(metadata["document_ids"]):
text = result.metadata["chunks"][i] if "chunks" in result.metadata else None text = metadata["chunks"][i] if "chunks" in metadata else None
score = result.metadata["scores"][i] if "scores" in result.metadata else None score = metadata["scores"][i] if "scores" in metadata else None
message.results.append( message.results.append(
OpenAIResponseOutputMessageFileSearchToolCallResults( OpenAIResponseOutputMessageFileSearchToolCallResults(
file_id=doc_id, file_id=doc_id,
filename=doc_id, filename=doc_id,
text=text, text=text if text is not None else "",
score=score, score=score if score is not None else 0.0,
attributes={}, attributes={},
) )
) )
@ -421,27 +448,32 @@ class ToolExecutor:
raise ValueError(f"Unknown tool {function.name} called") raise ValueError(f"Unknown tool {function.name} called")
# Build input message # Build input message
input_message = None input_message: OpenAIToolMessageParam | None = None
if result and result.content: if result and (result_content := getattr(result, "content", None)):
if isinstance(result.content, str): # all the mypy contortions here are still unsatisfactory with random Any typing
content = result.content if isinstance(result_content, str):
elif isinstance(result.content, list): msg_content: str | list[Any] = result_content
content = [] elif isinstance(result_content, list):
for item in result.content: content_list: list[Any] = []
for item in result_content:
part: Any
if isinstance(item, TextContentItem): if isinstance(item, TextContentItem):
part = OpenAIChatCompletionContentPartTextParam(text=item.text) part = OpenAIChatCompletionContentPartTextParam(text=item.text)
elif isinstance(item, ImageContentItem): elif isinstance(item, ImageContentItem):
if item.image.data: if item.image.data:
url = f"data:image;base64,{item.image.data}" url_value = f"data:image;base64,{item.image.data}"
else: else:
url = item.image.url url_value = str(item.image.url) if item.image.url else ""
part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url)) part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url_value))
else: else:
raise ValueError(f"Unknown result content type: {type(item)}") raise ValueError(f"Unknown result content type: {type(item)}")
content.append(part) content_list.append(part)
msg_content = content_list
else: else:
raise ValueError(f"Unknown result content type: {type(result.content)}") raise ValueError(f"Unknown result content type: {type(result_content)}")
input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id) # OpenAIToolMessageParam accepts str | list[TextParam] but we may have images
# This is runtime-safe as the API accepts it, but mypy complains
input_message = OpenAIToolMessageParam(content=msg_content, tool_call_id=tool_call_id) # type: ignore[arg-type]
else: else:
text = str(error_exc) if error_exc else "Tool execution failed" text = str(error_exc) if error_exc else "Tool execution failed"
input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id) input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)

View file

@ -5,6 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
from dataclasses import dataclass from dataclasses import dataclass
from typing import cast
from openai.types.chat import ChatCompletionToolParam from openai.types.chat import ChatCompletionToolParam
from pydantic import BaseModel from pydantic import BaseModel
@ -100,17 +101,19 @@ class ToolContext(BaseModel):
if isinstance(tool, OpenAIResponseToolMCP): if isinstance(tool, OpenAIResponseToolMCP):
previous_tools_by_label[tool.server_label] = tool previous_tools_by_label[tool.server_label] = tool
# collect tool definitions which are the same in current and previous requests: # collect tool definitions which are the same in current and previous requests:
tools_to_process = [] tools_to_process: list[OpenAIResponseInputTool] = []
matched: dict[str, OpenAIResponseInputToolMCP] = {} matched: dict[str, OpenAIResponseInputToolMCP] = {}
for tool in self.current_tools: # Mypy confuses OpenAIResponseInputTool (Input union) with OpenAIResponseTool (output union)
# which differ only in MCP type (InputToolMCP vs ToolMCP). Code is correct.
for tool in cast(list[OpenAIResponseInputTool], self.current_tools): # type: ignore[assignment]
if isinstance(tool, OpenAIResponseInputToolMCP) and tool.server_label in previous_tools_by_label: if isinstance(tool, OpenAIResponseInputToolMCP) and tool.server_label in previous_tools_by_label:
previous_tool = previous_tools_by_label[tool.server_label] previous_tool = previous_tools_by_label[tool.server_label]
if previous_tool.allowed_tools == tool.allowed_tools: if previous_tool.allowed_tools == tool.allowed_tools:
matched[tool.server_label] = tool matched[tool.server_label] = tool
else: else:
tools_to_process.append(tool) tools_to_process.append(tool) # type: ignore[arg-type]
else: else:
tools_to_process.append(tool) tools_to_process.append(tool) # type: ignore[arg-type]
# tools that are not the same or were not previously defined need to be processed: # tools that are not the same or were not previously defined need to be processed:
self.tools_to_process = tools_to_process self.tools_to_process = tools_to_process
# for all matched definitions, get the mcp_list_tools objects from the previous output: # for all matched definitions, get the mcp_list_tools objects from the previous output:
@ -119,9 +122,11 @@ class ToolContext(BaseModel):
] ]
# reconstruct the tool to server mappings that can be reused: # reconstruct the tool to server mappings that can be reused:
for listing in self.previous_tool_listings: for listing in self.previous_tool_listings:
# listing is OpenAIResponseOutputMessageMCPListTools which has tools: list[MCPListToolsTool]
definition = matched[listing.server_label] definition = matched[listing.server_label]
for tool in listing.tools: for mcp_tool in listing.tools:
self.previous_tools[tool.name] = definition # mcp_tool is MCPListToolsTool which has a name: str field
self.previous_tools[mcp_tool.name] = definition
def available_tools(self) -> list[OpenAIResponseTool]: def available_tools(self) -> list[OpenAIResponseTool]:
if not self.current_tools: if not self.current_tools:
@ -139,6 +144,8 @@ class ToolContext(BaseModel):
server_label=tool.server_label, server_label=tool.server_label,
allowed_tools=tool.allowed_tools, allowed_tools=tool.allowed_tools,
) )
# Exhaustive check - all tool types should be handled above
raise AssertionError(f"Unexpected tool type: {type(tool)}")
return [convert_tool(tool) for tool in self.current_tools] return [convert_tool(tool) for tool in self.current_tools]

View file

@ -7,6 +7,7 @@
import asyncio import asyncio
import re import re
import uuid import uuid
from collections.abc import Sequence
from llama_stack.apis.agents.agents import ResponseGuardrailSpec from llama_stack.apis.agents.agents import ResponseGuardrailSpec
from llama_stack.apis.agents.openai_responses import ( from llama_stack.apis.agents.openai_responses import (
@ -71,14 +72,14 @@ async def convert_chat_choice_to_response_message(
return OpenAIResponseMessage( return OpenAIResponseMessage(
id=message_id or f"msg_{uuid.uuid4()}", id=message_id or f"msg_{uuid.uuid4()}",
content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)], content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=list(annotations))],
status="completed", status="completed",
role="assistant", role="assistant",
) )
async def convert_response_content_to_chat_content( async def convert_response_content_to_chat_content(
content: (str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]), content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
) -> str | list[OpenAIChatCompletionContentPartParam]: ) -> str | list[OpenAIChatCompletionContentPartParam]:
""" """
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts. Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
@ -88,7 +89,8 @@ async def convert_response_content_to_chat_content(
if isinstance(content, str): if isinstance(content, str):
return content return content
converted_parts = [] # Type with union to avoid list invariance issues
converted_parts: list[OpenAIChatCompletionContentPartParam] = []
for content_part in content: for content_part in content:
if isinstance(content_part, OpenAIResponseInputMessageContentText): if isinstance(content_part, OpenAIResponseInputMessageContentText):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text)) converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
@ -158,9 +160,11 @@ async def convert_response_input_to_chat_messages(
), ),
) )
messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call])) messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
# Output can be None, use empty string as fallback
output_content = input_item.output if input_item.output is not None else ""
messages.append( messages.append(
OpenAIToolMessageParam( OpenAIToolMessageParam(
content=input_item.output, content=output_content,
tool_call_id=input_item.id, tool_call_id=input_item.id,
) )
) )
@ -172,7 +176,8 @@ async def convert_response_input_to_chat_messages(
): ):
# these are handled by the responses impl itself and not pass through to chat completions # these are handled by the responses impl itself and not pass through to chat completions
pass pass
else: elif isinstance(input_item, OpenAIResponseMessage):
# Narrow type to OpenAIResponseMessage which has content and role attributes
content = await convert_response_content_to_chat_content(input_item.content) content = await convert_response_content_to_chat_content(input_item.content)
message_type = await get_message_type_by_role(input_item.role) message_type = await get_message_type_by_role(input_item.role)
if message_type is None: if message_type is None:
@ -191,7 +196,8 @@ async def convert_response_input_to_chat_messages(
last_user_content = getattr(last_user_msg, "content", None) last_user_content = getattr(last_user_msg, "content", None)
if last_user_content == content: if last_user_content == content:
continue # Skip duplicate user message continue # Skip duplicate user message
messages.append(message_type(content=content)) # Dynamic message type call - different message types have different content expectations
messages.append(message_type(content=content)) # type: ignore[call-arg,arg-type]
if len(tool_call_results): if len(tool_call_results):
# Check if unpaired function_call_outputs reference function_calls from previous messages # Check if unpaired function_call_outputs reference function_calls from previous messages
if previous_messages: if previous_messages:
@ -237,8 +243,11 @@ async def convert_response_text_to_chat_response_format(
if text.format["type"] == "json_object": if text.format["type"] == "json_object":
return OpenAIResponseFormatJSONObject() return OpenAIResponseFormatJSONObject()
if text.format["type"] == "json_schema": if text.format["type"] == "json_schema":
# Assert name exists for json_schema format
assert text.format.get("name"), "json_schema format requires a name"
schema_name: str = text.format["name"] # type: ignore[assignment]
return OpenAIResponseFormatJSONSchema( return OpenAIResponseFormatJSONSchema(
json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"]) json_schema=OpenAIJSONSchema(name=schema_name, schema=text.format["schema"])
) )
raise ValueError(f"Unsupported text format: {text.format}") raise ValueError(f"Unsupported text format: {text.format}")
@ -251,7 +260,7 @@ async def get_message_type_by_role(role: str) -> type[OpenAIMessageParam] | None
"assistant": OpenAIAssistantMessageParam, "assistant": OpenAIAssistantMessageParam,
"developer": OpenAIDeveloperMessageParam, "developer": OpenAIDeveloperMessageParam,
} }
return role_to_type.get(role) return role_to_type.get(role) # type: ignore[return-value] # Pydantic models use ModelMetaclass
def _extract_citations_from_text( def _extract_citations_from_text(
@ -320,7 +329,8 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
# Look up shields to get their provider_resource_id (actual model ID) # Look up shields to get their provider_resource_id (actual model ID)
model_ids = [] model_ids = []
shields_list = await safety_api.routing_table.list_shields() # TODO: list_shields not in Safety interface but available at runtime via API routing
shields_list = await safety_api.routing_table.list_shields() # type: ignore[attr-defined]
for guardrail_id in guardrail_ids: for guardrail_id in guardrail_ids:
matching_shields = [shield for shield in shields_list.data if shield.identifier == guardrail_id] matching_shields = [shield for shield in shields_list.data if shield.identifier == guardrail_id]
@ -337,7 +347,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
for result in response.results: for result in response.results:
if result.flagged: if result.flagged:
message = result.user_message or "Content blocked by safety guardrails" message = result.user_message or "Content blocked by safety guardrails"
flagged_categories = [cat for cat, flagged in result.categories.items() if flagged] flagged_categories = (
[cat for cat, flagged in result.categories.items() if flagged] if result.categories else []
)
violation_type = result.metadata.get("violation_type", []) if result.metadata else [] violation_type = result.metadata.get("violation_type", []) if result.metadata else []
if flagged_categories: if flagged_categories:
@ -347,6 +359,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
return message return message
# No violations found
return None
def extract_guardrail_ids(guardrails: list | None) -> list[str]: def extract_guardrail_ids(guardrails: list | None) -> list[str]:
"""Extract guardrail IDs from guardrails parameter, handling both string IDs and ResponseGuardrailSpec objects.""" """Extract guardrail IDs from guardrails parameter, handling both string IDs and ResponseGuardrailSpec objects."""

View file

@ -6,7 +6,7 @@
import asyncio import asyncio
from llama_stack.apis.inference import Message from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
from llama_stack.core.telemetry import tracing from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -31,7 +31,7 @@ class ShieldRunnerMixin:
self.input_shields = input_shields self.input_shields = input_shields
self.output_shields = output_shields self.output_shields = output_shields
async def run_multiple_shields(self, messages: list[Message], identifiers: list[str]) -> None: async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
async def run_shield_with_span(identifier: str): async def run_shield_with_span(identifier: str):
async with tracing.span(f"run_shield_{identifier}"): async with tracing.span(f"run_shield_{identifier}"):
return await self.safety_api.run_shield( return await self.safety_api.run_shield(

View file

@ -33,4 +33,5 @@ class AnthropicInferenceAdapter(OpenAIMixin):
return "https://api.anthropic.com/v1" return "https://api.anthropic.com/v1"
async def list_provider_model_ids(self) -> Iterable[str]: async def list_provider_model_ids(self) -> Iterable[str]:
return [m.id async for m in AsyncAnthropic(api_key=self.get_api_key()).models.list()] api_key = self._get_api_key_from_config_or_provider_data()
return [m.id async for m in AsyncAnthropic(api_key=api_key).models.list()]

View file

@ -33,10 +33,11 @@ class DatabricksInferenceAdapter(OpenAIMixin):
async def list_provider_model_ids(self) -> Iterable[str]: async def list_provider_model_ids(self) -> Iterable[str]:
# Filter out None values from endpoint names # Filter out None values from endpoint names
api_token = self._get_api_key_from_config_or_provider_data()
return [ return [
endpoint.name # type: ignore[misc] endpoint.name # type: ignore[misc]
for endpoint in WorkspaceClient( for endpoint in WorkspaceClient(
host=self.config.url, token=self.get_api_key() host=self.config.url, token=api_token
).serving_endpoints.list() # TODO: this is not async ).serving_endpoints.list() # TODO: this is not async
] ]

View file

@ -128,7 +128,9 @@ class LiteLLMOpenAIMixin(
return schema return schema
async def _get_params(self, request: ChatCompletionRequest) -> dict: async def _get_params(self, request: ChatCompletionRequest) -> dict:
input_dict = {} from typing import Any
input_dict: dict[str, Any] = {}
input_dict["messages"] = [ input_dict["messages"] = [
await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages
@ -139,30 +141,27 @@ class LiteLLMOpenAIMixin(
f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported." f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported."
) )
fmt = fmt.json_schema # Convert to dict for manipulation
name = fmt["title"] fmt_dict = dict(fmt.json_schema)
del fmt["title"] name = fmt_dict["title"]
fmt["additionalProperties"] = False del fmt_dict["title"]
fmt_dict["additionalProperties"] = False
# Apply additionalProperties: False recursively to all objects # Apply additionalProperties: False recursively to all objects
fmt = self._add_additional_properties_recursive(fmt) fmt_dict = self._add_additional_properties_recursive(fmt_dict)
input_dict["response_format"] = { input_dict["response_format"] = {
"type": "json_schema", "type": "json_schema",
"json_schema": { "json_schema": {
"name": name, "name": name,
"schema": fmt, "schema": fmt_dict,
"strict": self.json_schema_strict, "strict": self.json_schema_strict,
}, },
} }
if request.tools: if request.tools:
input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools] input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
if request.tool_config.tool_choice: if request.tool_config and (tool_choice := request.tool_config.tool_choice):
input_dict["tool_choice"] = ( input_dict["tool_choice"] = tool_choice.value if isinstance(tool_choice, ToolChoice) else tool_choice
request.tool_config.tool_choice.value
if isinstance(request.tool_config.tool_choice, ToolChoice)
else request.tool_config.tool_choice
)
return { return {
"model": request.model, "model": request.model,
@ -176,10 +175,10 @@ class LiteLLMOpenAIMixin(
def get_api_key(self) -> str: def get_api_key(self) -> str:
provider_data = self.get_request_provider_data() provider_data = self.get_request_provider_data()
key_field = self.provider_data_api_key_field key_field = self.provider_data_api_key_field
if provider_data and getattr(provider_data, key_field, None): if provider_data and key_field and (api_key := getattr(provider_data, key_field, None)):
api_key = getattr(provider_data, key_field) return str(api_key) # type: ignore[no-any-return] # getattr returns Any, can't narrow without runtime type inspection
else:
api_key = self.api_key_from_config api_key = self.api_key_from_config
if not api_key: if not api_key:
raise ValueError( raise ValueError(
"API key is not set. Please provide a valid API key in the " "API key is not set. Please provide a valid API key in the "
@ -192,7 +191,13 @@ class LiteLLMOpenAIMixin(
self, self,
params: OpenAIEmbeddingsRequestWithExtraBody, params: OpenAIEmbeddingsRequestWithExtraBody,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
if not self.model_store:
raise ValueError("Model store is not initialized")
model_obj = await self.model_store.get_model(params.model) model_obj = await self.model_store.get_model(params.model)
if model_obj.provider_resource_id is None:
raise ValueError(f"Model {params.model} has no provider_resource_id")
provider_resource_id = model_obj.provider_resource_id
# Convert input to list if it's a string # Convert input to list if it's a string
input_list = [params.input] if isinstance(params.input, str) else params.input input_list = [params.input] if isinstance(params.input, str) else params.input
@ -200,7 +205,7 @@ class LiteLLMOpenAIMixin(
# Call litellm embedding function # Call litellm embedding function
# litellm.drop_params = True # litellm.drop_params = True
response = litellm.embedding( response = litellm.embedding(
model=self.get_litellm_model_name(model_obj.provider_resource_id), model=self.get_litellm_model_name(provider_resource_id),
input=input_list, input=input_list,
api_key=self.get_api_key(), api_key=self.get_api_key(),
api_base=self.api_base, api_base=self.api_base,
@ -217,7 +222,7 @@ class LiteLLMOpenAIMixin(
return OpenAIEmbeddingsResponse( return OpenAIEmbeddingsResponse(
data=data, data=data,
model=model_obj.provider_resource_id, model=provider_resource_id,
usage=usage, usage=usage,
) )
@ -225,10 +230,16 @@ class LiteLLMOpenAIMixin(
self, self,
params: OpenAICompletionRequestWithExtraBody, params: OpenAICompletionRequestWithExtraBody,
) -> OpenAICompletion: ) -> OpenAICompletion:
if not self.model_store:
raise ValueError("Model store is not initialized")
model_obj = await self.model_store.get_model(params.model) model_obj = await self.model_store.get_model(params.model)
if model_obj.provider_resource_id is None:
raise ValueError(f"Model {params.model} has no provider_resource_id")
provider_resource_id = model_obj.provider_resource_id
request_params = await prepare_openai_completion_params( request_params = await prepare_openai_completion_params(
model=self.get_litellm_model_name(model_obj.provider_resource_id), model=self.get_litellm_model_name(provider_resource_id),
prompt=params.prompt, prompt=params.prompt,
best_of=params.best_of, best_of=params.best_of,
echo=params.echo, echo=params.echo,
@ -249,7 +260,8 @@ class LiteLLMOpenAIMixin(
api_key=self.get_api_key(), api_key=self.get_api_key(),
api_base=self.api_base, api_base=self.api_base,
) )
return await litellm.atext_completion(**request_params) # LiteLLM returns compatible type but mypy can't verify external library
return await litellm.atext_completion(**request_params) # type: ignore[no-any-return] # external lib lacks type stubs
async def openai_chat_completion( async def openai_chat_completion(
self, self,
@ -265,10 +277,16 @@ class LiteLLMOpenAIMixin(
elif "include_usage" not in stream_options: elif "include_usage" not in stream_options:
stream_options = {**stream_options, "include_usage": True} stream_options = {**stream_options, "include_usage": True}
if not self.model_store:
raise ValueError("Model store is not initialized")
model_obj = await self.model_store.get_model(params.model) model_obj = await self.model_store.get_model(params.model)
if model_obj.provider_resource_id is None:
raise ValueError(f"Model {params.model} has no provider_resource_id")
provider_resource_id = model_obj.provider_resource_id
request_params = await prepare_openai_completion_params( request_params = await prepare_openai_completion_params(
model=self.get_litellm_model_name(model_obj.provider_resource_id), model=self.get_litellm_model_name(provider_resource_id),
messages=params.messages, messages=params.messages,
frequency_penalty=params.frequency_penalty, frequency_penalty=params.frequency_penalty,
function_call=params.function_call, function_call=params.function_call,
@ -294,7 +312,8 @@ class LiteLLMOpenAIMixin(
api_key=self.get_api_key(), api_key=self.get_api_key(),
api_base=self.api_base, api_base=self.api_base,
) )
return await litellm.acompletion(**request_params) # LiteLLM returns compatible type but mypy can't verify external library
return await litellm.acompletion(**request_params) # type: ignore[no-any-return] # external lib lacks type stubs
async def check_model_availability(self, model: str) -> bool: async def check_model_availability(self, model: str) -> bool:
""" """

View file

@ -161,8 +161,10 @@ def get_sampling_strategy_options(params: SamplingParams) -> dict:
if isinstance(params.strategy, GreedySamplingStrategy): if isinstance(params.strategy, GreedySamplingStrategy):
options["temperature"] = 0.0 options["temperature"] = 0.0
elif isinstance(params.strategy, TopPSamplingStrategy): elif isinstance(params.strategy, TopPSamplingStrategy):
options["temperature"] = params.strategy.temperature if params.strategy.temperature is not None:
options["top_p"] = params.strategy.top_p options["temperature"] = params.strategy.temperature
if params.strategy.top_p is not None:
options["top_p"] = params.strategy.top_p
elif isinstance(params.strategy, TopKSamplingStrategy): elif isinstance(params.strategy, TopKSamplingStrategy):
options["top_k"] = params.strategy.top_k options["top_k"] = params.strategy.top_k
else: else:
@ -192,12 +194,12 @@ def get_sampling_options(params: SamplingParams | None) -> dict:
def text_from_choice(choice) -> str: def text_from_choice(choice) -> str:
if hasattr(choice, "delta") and choice.delta: if hasattr(choice, "delta") and choice.delta:
return choice.delta.content return choice.delta.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations
if hasattr(choice, "message"): if hasattr(choice, "message"):
return choice.message.content return choice.message.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations
return choice.text return choice.text # type: ignore[no-any-return] # external OpenAI types lack precise annotations
def get_stop_reason(finish_reason: str) -> StopReason: def get_stop_reason(finish_reason: str) -> StopReason:
@ -216,7 +218,7 @@ def convert_openai_completion_logprobs(
) -> list[TokenLogProbs] | None: ) -> list[TokenLogProbs] | None:
if not logprobs: if not logprobs:
return None return None
if hasattr(logprobs, "top_logprobs"): if hasattr(logprobs, "top_logprobs") and logprobs.top_logprobs:
return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs] return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
# Together supports logprobs with top_k=1 only. This means for each token position, # Together supports logprobs with top_k=1 only. This means for each token position,
@ -236,7 +238,7 @@ def convert_openai_completion_logprobs_stream(text: str, logprobs: float | OpenA
if isinstance(logprobs, float): if isinstance(logprobs, float):
# Adapt response from Together CompletionChoicesChunk # Adapt response from Together CompletionChoicesChunk
return [TokenLogProbs(logprobs_by_token={text: logprobs})] return [TokenLogProbs(logprobs_by_token={text: logprobs})]
if hasattr(logprobs, "top_logprobs"): if hasattr(logprobs, "top_logprobs") and logprobs.top_logprobs:
return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs] return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
return None return None
@ -245,23 +247,24 @@ def process_completion_response(
response: OpenAICompatCompletionResponse, response: OpenAICompatCompletionResponse,
) -> CompletionResponse: ) -> CompletionResponse:
choice = response.choices[0] choice = response.choices[0]
text = choice.text or ""
# drop suffix <eot_id> if present and return stop reason as end of turn # drop suffix <eot_id> if present and return stop reason as end of turn
if choice.text.endswith("<|eot_id|>"): if text.endswith("<|eot_id|>"):
return CompletionResponse( return CompletionResponse(
stop_reason=StopReason.end_of_turn, stop_reason=StopReason.end_of_turn,
content=choice.text[: -len("<|eot_id|>")], content=text[: -len("<|eot_id|>")],
logprobs=convert_openai_completion_logprobs(choice.logprobs), logprobs=convert_openai_completion_logprobs(choice.logprobs),
) )
# drop suffix <eom_id> if present and return stop reason as end of message # drop suffix <eom_id> if present and return stop reason as end of message
if choice.text.endswith("<|eom_id|>"): if text.endswith("<|eom_id|>"):
return CompletionResponse( return CompletionResponse(
stop_reason=StopReason.end_of_message, stop_reason=StopReason.end_of_message,
content=choice.text[: -len("<|eom_id|>")], content=text[: -len("<|eom_id|>")],
logprobs=convert_openai_completion_logprobs(choice.logprobs), logprobs=convert_openai_completion_logprobs(choice.logprobs),
) )
return CompletionResponse( return CompletionResponse(
stop_reason=get_stop_reason(choice.finish_reason), stop_reason=get_stop_reason(choice.finish_reason or "stop"),
content=choice.text, content=text,
logprobs=convert_openai_completion_logprobs(choice.logprobs), logprobs=convert_openai_completion_logprobs(choice.logprobs),
) )
@ -272,10 +275,10 @@ def process_chat_completion_response(
) -> ChatCompletionResponse: ) -> ChatCompletionResponse:
choice = response.choices[0] choice = response.choices[0]
if choice.finish_reason == "tool_calls": if choice.finish_reason == "tool_calls":
if not choice.message or not choice.message.tool_calls: if not hasattr(choice, "message") or not choice.message or not choice.message.tool_calls: # type: ignore[attr-defined] # OpenAICompatCompletionChoice is runtime duck-typed
raise ValueError("Tool calls are not present in the response") raise ValueError("Tool calls are not present in the response")
tool_calls = [convert_tool_call(tool_call) for tool_call in choice.message.tool_calls] tool_calls = [convert_tool_call(tool_call) for tool_call in choice.message.tool_calls] # type: ignore[attr-defined] # OpenAICompatCompletionChoice is runtime duck-typed
if any(isinstance(tool_call, UnparseableToolCall) for tool_call in tool_calls): if any(isinstance(tool_call, UnparseableToolCall) for tool_call in tool_calls):
# If we couldn't parse a tool call, jsonify the tool calls and return them # If we couldn't parse a tool call, jsonify the tool calls and return them
return ChatCompletionResponse( return ChatCompletionResponse(
@ -287,9 +290,11 @@ def process_chat_completion_response(
) )
else: else:
# Otherwise, return tool calls as normal # Otherwise, return tool calls as normal
# Filter to only valid ToolCall objects
valid_tool_calls = [tc for tc in tool_calls if isinstance(tc, ToolCall)]
return ChatCompletionResponse( return ChatCompletionResponse(
completion_message=CompletionMessage( completion_message=CompletionMessage(
tool_calls=tool_calls, tool_calls=valid_tool_calls,
stop_reason=StopReason.end_of_turn, stop_reason=StopReason.end_of_turn,
# Content is not optional # Content is not optional
content="", content="",
@ -299,7 +304,7 @@ def process_chat_completion_response(
# TODO: This does not work well with tool calls for vLLM remote provider # TODO: This does not work well with tool calls for vLLM remote provider
# Ref: https://github.com/meta-llama/llama-stack/issues/1058 # Ref: https://github.com/meta-llama/llama-stack/issues/1058
raw_message = decode_assistant_message(text_from_choice(choice), get_stop_reason(choice.finish_reason)) raw_message = decode_assistant_message(text_from_choice(choice), get_stop_reason(choice.finish_reason or "stop"))
# NOTE: If we do not set tools in chat-completion request, we should not # NOTE: If we do not set tools in chat-completion request, we should not
# expect the ToolCall in the response. Instead, we should return the raw # expect the ToolCall in the response. Instead, we should return the raw
@ -324,8 +329,8 @@ def process_chat_completion_response(
return ChatCompletionResponse( return ChatCompletionResponse(
completion_message=CompletionMessage( completion_message=CompletionMessage(
content=raw_message.content, content=raw_message.content, # type: ignore[arg-type] # decode_assistant_message returns Union[str, InterleavedContent]
stop_reason=raw_message.stop_reason, stop_reason=raw_message.stop_reason or StopReason.end_of_turn,
tool_calls=raw_message.tool_calls, tool_calls=raw_message.tool_calls,
), ),
logprobs=None, logprobs=None,
@ -448,7 +453,7 @@ async def process_chat_completion_stream_response(
) )
# parse tool calls and report errors # parse tool calls and report errors
message = decode_assistant_message(buffer, stop_reason) message = decode_assistant_message(buffer, stop_reason or StopReason.end_of_turn)
parsed_tool_calls = len(message.tool_calls) > 0 parsed_tool_calls = len(message.tool_calls) > 0
if ipython and not parsed_tool_calls: if ipython and not parsed_tool_calls:
@ -463,7 +468,7 @@ async def process_chat_completion_stream_response(
) )
) )
request_tools = {t.tool_name: t for t in request.tools} request_tools = {t.tool_name: t for t in (request.tools or [])}
for tool_call in message.tool_calls: for tool_call in message.tool_calls:
if tool_call.tool_name in request_tools: if tool_call.tool_name in request_tools:
yield ChatCompletionResponseStreamChunk( yield ChatCompletionResponseStreamChunk(
@ -525,7 +530,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
} }
if hasattr(message, "tool_calls") and message.tool_calls: if hasattr(message, "tool_calls") and message.tool_calls:
result["tool_calls"] = [] tool_calls_list = []
for tc in message.tool_calls: for tc in message.tool_calls:
# The tool.tool_name can be a str or a BuiltinTool enum. If # The tool.tool_name can be a str or a BuiltinTool enum. If
# it's the latter, convert to a string. # it's the latter, convert to a string.
@ -533,7 +538,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
if isinstance(tool_name, BuiltinTool): if isinstance(tool_name, BuiltinTool):
tool_name = tool_name.value tool_name = tool_name.value
result["tool_calls"].append( tool_calls_list.append(
{ {
"id": tc.call_id, "id": tc.call_id,
"type": "function", "type": "function",
@ -543,6 +548,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
}, },
} }
) )
result["tool_calls"] = tool_calls_list # type: ignore[assignment] # dict allows Any value, stricter type expected
return result return result
@ -608,7 +614,7 @@ async def convert_message_to_openai_dict_new(
), ),
) )
elif isinstance(content_, list): elif isinstance(content_, list):
return [await impl(item) for item in content_] return [await impl(item) for item in content_] # type: ignore[misc] # recursive list comprehension confuses mypy's type narrowing
else: else:
raise ValueError(f"Unsupported content type: {type(content_)}") raise ValueError(f"Unsupported content type: {type(content_)}")
@ -620,7 +626,7 @@ async def convert_message_to_openai_dict_new(
else: else:
return [ret] return [ret]
out: OpenAIChatCompletionMessage = None out: OpenAIChatCompletionMessage
if isinstance(message, UserMessage): if isinstance(message, UserMessage):
out = OpenAIChatCompletionUserMessage( out = OpenAIChatCompletionUserMessage(
role="user", role="user",
@ -636,7 +642,7 @@ async def convert_message_to_openai_dict_new(
), ),
type="function", type="function",
) )
for tool in message.tool_calls for tool in (message.tool_calls or [])
] ]
params = {} params = {}
if tool_calls: if tool_calls:
@ -644,18 +650,18 @@ async def convert_message_to_openai_dict_new(
out = OpenAIChatCompletionAssistantMessage( out = OpenAIChatCompletionAssistantMessage(
role="assistant", role="assistant",
content=await _convert_message_content(message.content), content=await _convert_message_content(message.content),
**params, **params, # type: ignore[typeddict-item] # tool_calls dict expansion conflicts with TypedDict optional field
) )
elif isinstance(message, ToolResponseMessage): elif isinstance(message, ToolResponseMessage):
out = OpenAIChatCompletionToolMessage( out = OpenAIChatCompletionToolMessage(
role="tool", role="tool",
tool_call_id=message.call_id, tool_call_id=message.call_id,
content=await _convert_message_content(message.content), content=await _convert_message_content(message.content), # type: ignore[typeddict-item] # content union type incompatible with TypedDict str requirement
) )
elif isinstance(message, SystemMessage): elif isinstance(message, SystemMessage):
out = OpenAIChatCompletionSystemMessage( out = OpenAIChatCompletionSystemMessage(
role="system", role="system",
content=await _convert_message_content(message.content), content=await _convert_message_content(message.content), # type: ignore[typeddict-item] # content union type incompatible with TypedDict str requirement
) )
else: else:
raise ValueError(f"Unsupported message type: {type(message)}") raise ValueError(f"Unsupported message type: {type(message)}")
@ -758,16 +764,16 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
function = out["function"] function = out["function"]
if isinstance(tool.tool_name, BuiltinTool): if isinstance(tool.tool_name, BuiltinTool):
function["name"] = tool.tool_name.value function["name"] = tool.tool_name.value # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
else: else:
function["name"] = tool.tool_name function["name"] = tool.tool_name # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
if tool.description: if tool.description:
function["description"] = tool.description function["description"] = tool.description # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
if tool.input_schema: if tool.input_schema:
# Pass through the entire JSON Schema as-is # Pass through the entire JSON Schema as-is
function["parameters"] = tool.input_schema function["parameters"] = tool.input_schema # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
# NOTE: OpenAI does not support output_schema, so we drop it here # NOTE: OpenAI does not support output_schema, so we drop it here
# It's stored in LlamaStack for validation and other provider usage # It's stored in LlamaStack for validation and other provider usage
@ -815,15 +821,15 @@ def _convert_openai_request_tool_config(tool_choice: str | dict[str, Any] | None
tool_config = ToolConfig() tool_config = ToolConfig()
if tool_choice: if tool_choice:
try: try:
tool_choice = ToolChoice(tool_choice) tool_choice = ToolChoice(tool_choice) # type: ignore[assignment] # reassigning to enum narrows union but mypy can't track after exception
except ValueError: except ValueError:
pass pass
tool_config.tool_choice = tool_choice tool_config.tool_choice = tool_choice # type: ignore[assignment] # ToolConfig.tool_choice accepts Union[ToolChoice, dict] but mypy tracks narrower type
return tool_config return tool_config
def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]: def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]:
lls_tools = [] lls_tools: list[ToolDefinition] = []
if not tools: if not tools:
return lls_tools return lls_tools
@ -843,16 +849,16 @@ def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) ->
def _convert_openai_request_response_format( def _convert_openai_request_response_format(
response_format: OpenAIResponseFormatParam = None, response_format: OpenAIResponseFormatParam | None = None,
): ):
if not response_format: if not response_format:
return None return None
# response_format can be a dict or a pydantic model # response_format can be a dict or a pydantic model
response_format = dict(response_format) response_format_dict = dict(response_format) # type: ignore[arg-type] # OpenAIResponseFormatParam union needs dict conversion
if response_format.get("type", "") == "json_schema": if response_format_dict.get("type", "") == "json_schema":
return JsonSchemaResponseFormat( return JsonSchemaResponseFormat(
type="json_schema", type="json_schema", # type: ignore[arg-type] # Literal["json_schema"] incompatible with expected type
json_schema=response_format.get("json_schema", {}).get("schema", ""), json_schema=response_format_dict.get("json_schema", {}).get("schema", ""),
) )
return None return None
@ -938,16 +944,15 @@ def _convert_openai_sampling_params(
# Map an explicit temperature of 0 to greedy sampling # Map an explicit temperature of 0 to greedy sampling
if temperature == 0: if temperature == 0:
strategy = GreedySamplingStrategy() sampling_params.strategy = GreedySamplingStrategy()
else: else:
# OpenAI defaults to 1.0 for temperature and top_p if unset # OpenAI defaults to 1.0 for temperature and top_p if unset
if temperature is None: if temperature is None:
temperature = 1.0 temperature = 1.0
if top_p is None: if top_p is None:
top_p = 1.0 top_p = 1.0
strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p) sampling_params.strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p) # type: ignore[assignment] # SamplingParams.strategy union accepts this type
sampling_params.strategy = strategy
return sampling_params return sampling_params
@ -957,23 +962,24 @@ def openai_messages_to_messages(
""" """
Convert a list of OpenAIChatCompletionMessage into a list of Message. Convert a list of OpenAIChatCompletionMessage into a list of Message.
""" """
converted_messages = [] converted_messages: list[Message] = []
for message in messages: for message in messages:
converted_message: Message
if message.role == "system": if message.role == "system":
converted_message = SystemMessage(content=openai_content_to_content(message.content)) converted_message = SystemMessage(content=openai_content_to_content(message.content)) # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
elif message.role == "user": elif message.role == "user":
converted_message = UserMessage(content=openai_content_to_content(message.content)) converted_message = UserMessage(content=openai_content_to_content(message.content)) # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
elif message.role == "assistant": elif message.role == "assistant":
converted_message = CompletionMessage( converted_message = CompletionMessage(
content=openai_content_to_content(message.content), content=openai_content_to_content(message.content), # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
tool_calls=_convert_openai_tool_calls(message.tool_calls), tool_calls=_convert_openai_tool_calls(message.tool_calls) if message.tool_calls else [], # type: ignore[arg-type] # OpenAI tool_calls type incompatible with conversion function
stop_reason=StopReason.end_of_turn, stop_reason=StopReason.end_of_turn,
) )
elif message.role == "tool": elif message.role == "tool":
converted_message = ToolResponseMessage( converted_message = ToolResponseMessage(
role="tool", role="tool",
call_id=message.tool_call_id, call_id=message.tool_call_id,
content=openai_content_to_content(message.content), content=openai_content_to_content(message.content), # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
) )
else: else:
raise ValueError(f"Unknown role {message.role}") raise ValueError(f"Unknown role {message.role}")
@ -990,9 +996,9 @@ def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionConten
return [openai_content_to_content(c) for c in content] return [openai_content_to_content(c) for c in content]
elif hasattr(content, "type"): elif hasattr(content, "type"):
if content.type == "text": if content.type == "text":
return TextContentItem(type="text", text=content.text) return TextContentItem(type="text", text=content.text) # type: ignore[attr-defined] # Iterable narrowed by hasattr check but mypy doesn't track
elif content.type == "image_url": elif content.type == "image_url":
return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url))) return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url))) # type: ignore[attr-defined] # Iterable narrowed by hasattr check but mypy doesn't track
else: else:
raise ValueError(f"Unknown content type: {content.type}") raise ValueError(f"Unknown content type: {content.type}")
else: else:
@ -1041,9 +1047,9 @@ def convert_openai_chat_completion_choice(
completion_message=CompletionMessage( completion_message=CompletionMessage(
content=choice.message.content or "", # CompletionMessage content is not optional content=choice.message.content or "", # CompletionMessage content is not optional
stop_reason=_convert_openai_finish_reason(choice.finish_reason), stop_reason=_convert_openai_finish_reason(choice.finish_reason),
tool_calls=_convert_openai_tool_calls(choice.message.tool_calls), tool_calls=_convert_openai_tool_calls(choice.message.tool_calls) if choice.message.tool_calls else [], # type: ignore[arg-type] # OpenAI tool_calls Optional type broadens union
), ),
logprobs=_convert_openai_logprobs(getattr(choice, "logprobs", None)), logprobs=_convert_openai_logprobs(getattr(choice, "logprobs", None)), # type: ignore[arg-type] # getattr returns Any, can't narrow without inspection
) )
@ -1070,7 +1076,7 @@ async def convert_openai_chat_completion_stream(
choice = chunk.choices[0] # assuming only one choice per chunk choice = chunk.choices[0] # assuming only one choice per chunk
# we assume there's only one finish_reason in the stream # we assume there's only one finish_reason in the stream
stop_reason = _convert_openai_finish_reason(choice.finish_reason) or stop_reason stop_reason = _convert_openai_finish_reason(choice.finish_reason) if choice.finish_reason else stop_reason
logprobs = getattr(choice, "logprobs", None) logprobs = getattr(choice, "logprobs", None)
# if there's a tool call, emit an event for each tool in the list # if there's a tool call, emit an event for each tool in the list
@ -1083,7 +1089,7 @@ async def convert_openai_chat_completion_stream(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=event_type, event_type=event_type,
delta=TextDelta(text=choice.delta.content), delta=TextDelta(text=choice.delta.content),
logprobs=_convert_openai_logprobs(logprobs), logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
) )
) )
@ -1101,10 +1107,10 @@ async def convert_openai_chat_completion_stream(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=event_type, event_type=event_type,
delta=ToolCallDelta( delta=ToolCallDelta(
tool_call=_convert_openai_tool_calls([tool_call])[0], tool_call=_convert_openai_tool_calls([tool_call])[0], # type: ignore[arg-type, list-item] # delta tool_call type differs from complete tool_call
parse_status=ToolCallParseStatus.succeeded, parse_status=ToolCallParseStatus.succeeded,
), ),
logprobs=_convert_openai_logprobs(logprobs), logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
) )
) )
else: else:
@ -1125,12 +1131,15 @@ async def convert_openai_chat_completion_stream(
if tool_call.function.name: if tool_call.function.name:
buffer["name"] = tool_call.function.name buffer["name"] = tool_call.function.name
delta = f"{buffer['name']}(" delta = f"{buffer['name']}("
buffer["content"] += delta if buffer["content"] is not None:
buffer["content"] += delta
if tool_call.function.arguments: if tool_call.function.arguments:
delta = tool_call.function.arguments delta = tool_call.function.arguments
buffer["arguments"] += delta if buffer["arguments"] is not None and delta:
buffer["content"] += delta buffer["arguments"] += delta
if buffer["content"] is not None and delta:
buffer["content"] += delta
yield ChatCompletionResponseStreamChunk( yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
@ -1139,7 +1148,7 @@ async def convert_openai_chat_completion_stream(
tool_call=delta, tool_call=delta,
parse_status=ToolCallParseStatus.in_progress, parse_status=ToolCallParseStatus.in_progress,
), ),
logprobs=_convert_openai_logprobs(logprobs), logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
) )
) )
elif choice.delta.content: elif choice.delta.content:
@ -1147,7 +1156,7 @@ async def convert_openai_chat_completion_stream(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=event_type, event_type=event_type,
delta=TextDelta(text=choice.delta.content or ""), delta=TextDelta(text=choice.delta.content or ""),
logprobs=_convert_openai_logprobs(logprobs), logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
) )
) )
@ -1155,7 +1164,8 @@ async def convert_openai_chat_completion_stream(
logger.debug(f"toolcall_buffer[{idx}]: {buffer}") logger.debug(f"toolcall_buffer[{idx}]: {buffer}")
if buffer["name"]: if buffer["name"]:
delta = ")" delta = ")"
buffer["content"] += delta if buffer["content"] is not None:
buffer["content"] += delta
yield ChatCompletionResponseStreamChunk( yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=event_type, event_type=event_type,
@ -1168,16 +1178,16 @@ async def convert_openai_chat_completion_stream(
) )
try: try:
tool_call = ToolCall( parsed_tool_call = ToolCall(
call_id=buffer["call_id"], call_id=buffer["call_id"] or "",
tool_name=buffer["name"], tool_name=buffer["name"] or "",
arguments=buffer["arguments"], arguments=buffer["arguments"] or "",
) )
yield ChatCompletionResponseStreamChunk( yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress, event_type=ChatCompletionResponseEventType.progress,
delta=ToolCallDelta( delta=ToolCallDelta(
tool_call=tool_call, tool_call=parsed_tool_call, # type: ignore[arg-type] # ToolCallDelta.tool_call accepts Union[str, ToolCall]
parse_status=ToolCallParseStatus.succeeded, parse_status=ToolCallParseStatus.succeeded,
), ),
stop_reason=stop_reason, stop_reason=stop_reason,
@ -1189,7 +1199,7 @@ async def convert_openai_chat_completion_stream(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress, event_type=ChatCompletionResponseEventType.progress,
delta=ToolCallDelta( delta=ToolCallDelta(
tool_call=buffer["content"], tool_call=buffer["content"], # type: ignore[arg-type] # ToolCallDelta.tool_call accepts Union[str, ToolCall]
parse_status=ToolCallParseStatus.failed, parse_status=ToolCallParseStatus.failed,
), ),
stop_reason=stop_reason, stop_reason=stop_reason,
@ -1250,7 +1260,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
top_p: float | None = None, top_p: float | None = None,
user: str | None = None, user: str | None = None,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
messages = openai_messages_to_messages(messages) messages = openai_messages_to_messages(messages) # type: ignore[assignment] # converted from OpenAI to LlamaStack message format
response_format = _convert_openai_request_response_format(response_format) response_format = _convert_openai_request_response_format(response_format)
sampling_params = _convert_openai_sampling_params( sampling_params = _convert_openai_sampling_params(
max_tokens=max_tokens, max_tokens=max_tokens,
@ -1259,15 +1269,15 @@ class OpenAIChatCompletionToLlamaStackMixin:
) )
tool_config = _convert_openai_request_tool_config(tool_choice) tool_config = _convert_openai_request_tool_config(tool_choice)
tools = _convert_openai_request_tools(tools) tools = _convert_openai_request_tools(tools) # type: ignore[assignment] # converted from OpenAI to LlamaStack tool format
if tool_config.tool_choice == ToolChoice.none: if tool_config.tool_choice == ToolChoice.none:
tools = [] tools = [] # type: ignore[assignment] # empty list narrows return type but mypy tracks broader type
outstanding_responses = [] outstanding_responses = []
# "n" is the number of completions to generate per prompt # "n" is the number of completions to generate per prompt
n = n or 1 n = n or 1
for _i in range(0, n): for _i in range(0, n):
response = self.chat_completion( response = self.chat_completion( # type: ignore[attr-defined] # mixin expects class to implement chat_completion
model_id=model, model_id=model,
messages=messages, messages=messages,
sampling_params=sampling_params, sampling_params=sampling_params,
@ -1279,7 +1289,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
outstanding_responses.append(response) outstanding_responses.append(response)
if stream: if stream:
return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses) return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses) # type: ignore[no-any-return] # mixin async generator return type too complex for mypy
return await OpenAIChatCompletionToLlamaStackMixin._process_non_stream_response( return await OpenAIChatCompletionToLlamaStackMixin._process_non_stream_response(
self, model, outstanding_responses self, model, outstanding_responses
@ -1295,14 +1305,16 @@ class OpenAIChatCompletionToLlamaStackMixin:
response = await outstanding_response response = await outstanding_response
async for chunk in response: async for chunk in response:
event = chunk.event event = chunk.event
finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason) finish_reason = (
_convert_stop_reason_to_openai_finish_reason(event.stop_reason) if event.stop_reason else None
)
if isinstance(event.delta, TextDelta): if isinstance(event.delta, TextDelta):
text_delta = event.delta.text text_delta = event.delta.text
delta = OpenAIChoiceDelta(content=text_delta) delta = OpenAIChoiceDelta(content=text_delta)
yield OpenAIChatCompletionChunk( yield OpenAIChatCompletionChunk(
id=id, id=id,
choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)], choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)], # type: ignore[arg-type] # finish_reason Optional[str] incompatible with Literal union
created=int(time.time()), created=int(time.time()),
model=model, model=model,
object="chat.completion.chunk", object="chat.completion.chunk",
@ -1310,13 +1322,17 @@ class OpenAIChatCompletionToLlamaStackMixin:
elif isinstance(event.delta, ToolCallDelta): elif isinstance(event.delta, ToolCallDelta):
if event.delta.parse_status == ToolCallParseStatus.succeeded: if event.delta.parse_status == ToolCallParseStatus.succeeded:
tool_call = event.delta.tool_call tool_call = event.delta.tool_call
if isinstance(tool_call, str):
continue
# First chunk includes full structure # First chunk includes full structure
openai_tool_call = OpenAIChoiceDeltaToolCall( openai_tool_call = OpenAIChoiceDeltaToolCall(
index=0, index=0,
id=tool_call.call_id, id=tool_call.call_id,
function=OpenAIChoiceDeltaToolCallFunction( function=OpenAIChoiceDeltaToolCallFunction(
name=tool_call.tool_name, name=tool_call.tool_name
if isinstance(tool_call.tool_name, str)
else tool_call.tool_name.value, # type: ignore[arg-type] # enum .value extraction on Union confuses mypy
arguments="", arguments="",
), ),
) )
@ -1324,7 +1340,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
yield OpenAIChatCompletionChunk( yield OpenAIChatCompletionChunk(
id=id, id=id,
choices=[ choices=[
OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta) OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta) # type: ignore[arg-type] # finish_reason Optional[str] incompatible with Literal union
], ],
created=int(time.time()), created=int(time.time()),
model=model, model=model,
@ -1341,7 +1357,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
yield OpenAIChatCompletionChunk( yield OpenAIChatCompletionChunk(
id=id, id=id,
choices=[ choices=[
OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta) OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta) # type: ignore[arg-type] # finish_reason Optional[str] incompatible with Literal union
], ],
created=int(time.time()), created=int(time.time()),
model=model, model=model,
@ -1351,7 +1367,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
async def _process_non_stream_response( async def _process_non_stream_response(
self, model: str, outstanding_responses: list[Awaitable[ChatCompletionResponse]] self, model: str, outstanding_responses: list[Awaitable[ChatCompletionResponse]]
) -> OpenAIChatCompletion: ) -> OpenAIChatCompletion:
choices = [] choices: list[OpenAIChatCompletionChoice] = []
for outstanding_response in outstanding_responses: for outstanding_response in outstanding_responses:
response = await outstanding_response response = await outstanding_response
completion_message = response.completion_message completion_message = response.completion_message
@ -1360,14 +1376,14 @@ class OpenAIChatCompletionToLlamaStackMixin:
choice = OpenAIChatCompletionChoice( choice = OpenAIChatCompletionChoice(
index=len(choices), index=len(choices),
message=message, message=message, # type: ignore[arg-type] # OpenAIChatCompletionMessage union incompatible with narrower Message type
finish_reason=finish_reason, finish_reason=finish_reason,
) )
choices.append(choice) choices.append(choice) # type: ignore[arg-type] # OpenAIChatCompletionChoice type annotation mismatch
return OpenAIChatCompletion( return OpenAIChatCompletion(
id=f"chatcmpl-{uuid.uuid4()}", id=f"chatcmpl-{uuid.uuid4()}",
choices=choices, choices=choices, # type: ignore[arg-type] # list[OpenAIChatCompletionChoice] union incompatible
created=int(time.time()), created=int(time.time()),
model=model, model=model,
object="chat.completion", object="chat.completion",

View file

@ -196,6 +196,7 @@ def make_overlapped_chunks(
chunks.append( chunks.append(
Chunk( Chunk(
content=chunk, content=chunk,
chunk_id=chunk_id,
metadata=chunk_metadata, metadata=chunk_metadata,
chunk_metadata=backend_chunk_metadata, chunk_metadata=backend_chunk_metadata,
) )

View file

@ -430,6 +430,32 @@ def _unwrap_generic_list(typ: type[list[T]]) -> type[T]:
return list_type # type: ignore[no-any-return] return list_type # type: ignore[no-any-return]
def is_generic_sequence(typ: object) -> bool:
"True if the specified type is a generic Sequence, i.e. `Sequence[T]`."
import collections.abc
typ = unwrap_annotated_type(typ)
return typing.get_origin(typ) is collections.abc.Sequence
def unwrap_generic_sequence(typ: object) -> type:
"""
Extracts the item type of a Sequence type.
:param typ: The Sequence type `Sequence[T]`.
:returns: The item type `T`.
"""
return rewrap_annotated_type(_unwrap_generic_sequence, typ) # type: ignore[arg-type]
def _unwrap_generic_sequence(typ: object) -> type:
"Extracts the item type of a Sequence type (e.g. returns `T` for `Sequence[T]`)."
(sequence_type,) = typing.get_args(typ) # unpack single tuple element
return sequence_type # type: ignore[no-any-return]
def is_generic_set(typ: object) -> TypeGuard[type[set]]: def is_generic_set(typ: object) -> TypeGuard[type[set]]:
"True if the specified type is a generic set, i.e. `Set[T]`." "True if the specified type is a generic set, i.e. `Set[T]`."

View file

@ -18,10 +18,12 @@ from .inspection import (
TypeLike, TypeLike,
is_generic_dict, is_generic_dict,
is_generic_list, is_generic_list,
is_generic_sequence,
is_type_optional, is_type_optional,
is_type_union, is_type_union,
unwrap_generic_dict, unwrap_generic_dict,
unwrap_generic_list, unwrap_generic_list,
unwrap_generic_sequence,
unwrap_optional_type, unwrap_optional_type,
unwrap_union_types, unwrap_union_types,
) )
@ -155,24 +157,28 @@ def python_type_to_name(data_type: TypeLike, force: bool = False) -> str:
if metadata is not None: if metadata is not None:
# type is Annotated[T, ...] # type is Annotated[T, ...]
arg = typing.get_args(data_type)[0] arg = typing.get_args(data_type)[0]
return python_type_to_name(arg) return python_type_to_name(arg, force=force)
if force: if force:
# generic types # generic types
if is_type_optional(data_type, strict=True): if is_type_optional(data_type, strict=True):
inner_name = python_type_to_name(unwrap_optional_type(data_type)) inner_name = python_type_to_name(unwrap_optional_type(data_type), force=True)
return f"Optional__{inner_name}" return f"Optional__{inner_name}"
elif is_generic_list(data_type): elif is_generic_list(data_type):
item_name = python_type_to_name(unwrap_generic_list(data_type)) item_name = python_type_to_name(unwrap_generic_list(data_type), force=True)
return f"List__{item_name}"
elif is_generic_sequence(data_type):
# Treat Sequence the same as List for schema generation purposes
item_name = python_type_to_name(unwrap_generic_sequence(data_type), force=True)
return f"List__{item_name}" return f"List__{item_name}"
elif is_generic_dict(data_type): elif is_generic_dict(data_type):
key_type, value_type = unwrap_generic_dict(data_type) key_type, value_type = unwrap_generic_dict(data_type)
key_name = python_type_to_name(key_type) key_name = python_type_to_name(key_type, force=True)
value_name = python_type_to_name(value_type) value_name = python_type_to_name(value_type, force=True)
return f"Dict__{key_name}__{value_name}" return f"Dict__{key_name}__{value_name}"
elif is_type_union(data_type): elif is_type_union(data_type):
member_types = unwrap_union_types(data_type) member_types = unwrap_union_types(data_type)
member_names = "__".join(python_type_to_name(member_type) for member_type in member_types) member_names = "__".join(python_type_to_name(member_type, force=True) for member_type in member_types)
return f"Union__{member_names}" return f"Union__{member_names}"
# named system or user-defined type # named system or user-defined type

View file

@ -111,7 +111,7 @@ def get_class_property_docstrings(
def docstring_to_schema(data_type: type) -> Schema: def docstring_to_schema(data_type: type) -> Schema:
short_description, long_description = get_class_docstrings(data_type) short_description, long_description = get_class_docstrings(data_type)
schema: Schema = { schema: Schema = {
"title": python_type_to_name(data_type), "title": python_type_to_name(data_type, force=True),
} }
description = "\n".join(filter(None, [short_description, long_description])) description = "\n".join(filter(None, [short_description, long_description]))
@ -417,6 +417,10 @@ class JsonSchemaGenerator:
if origin_type is list: if origin_type is list:
(list_type,) = typing.get_args(typ) # unpack single tuple element (list_type,) = typing.get_args(typ) # unpack single tuple element
return {"type": "array", "items": self.type_to_schema(list_type)} return {"type": "array", "items": self.type_to_schema(list_type)}
elif origin_type is collections.abc.Sequence:
# Treat Sequence the same as list for JSON schema (both are arrays)
(sequence_type,) = typing.get_args(typ) # unpack single tuple element
return {"type": "array", "items": self.type_to_schema(sequence_type)}
elif origin_type is dict: elif origin_type is dict:
key_type, value_type = typing.get_args(typ) key_type, value_type = typing.get_args(typ)
if not (key_type is str or key_type is int or is_type_enum(key_type)): if not (key_type is str or key_type is int or is_type_enum(key_type)):

View file

@ -39,7 +39,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
if provider.provider_type in ( if provider.provider_type in (
"inline::meta-reference", "inline::meta-reference",
"inline::sentence-transformers", "inline::sentence-transformers",
"inline::vllm", "remote::vllm",
"remote::bedrock", "remote::bedrock",
"remote::databricks", "remote::databricks",
# Technically Nvidia does support OpenAI completions, but none of their hosted models # Technically Nvidia does support OpenAI completions, but none of their hosted models
@ -120,7 +120,7 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
if provider.provider_type in ( if provider.provider_type in (
"inline::meta-reference", "inline::meta-reference",
"inline::sentence-transformers", "inline::sentence-transformers",
"inline::vllm", "remote::vllm",
"remote::bedrock", "remote::bedrock",
"remote::databricks", "remote::databricks",
"remote::cerebras", "remote::cerebras",

View file

@ -0,0 +1,763 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-861837565219, score: 0.015252742239920682, attributes: {'filename': 'test_response_non_streaming_file_search.txt', 'chunk_id': '869ae0c0-ab85-ca6f-e5d0-024381443c27', 'document_id': 'file-861837565219', 'token_count': 10.0, 'metadata_token_count': 13.0} (cite as <|file-861837565219|>)\nLlama 4 Maverick has 128 experts\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "OEZj77MujzEilF"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "aZ37vwWHFrpGy"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "csghpwq82thpEG"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "1dRxATyjFkzZ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "DkAEGxNVXrhL9KJ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "SI7v0ofTi6JL0LP"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "tThgm0YItJ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "5UnIV9ZM2koPE"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "pFPs5HfBSA"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CIT42IHpAEgx"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "jpXixTaXlYSxTu3"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "IBEKia6bwNtLB"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "hHMPPr4Q"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "iGTIWlxj9c2Equ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "wfQImUZLNC8Dtgc"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "m21wFuqSLpMN"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CP5N1QxHqEnzbnq"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "861",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "jgQZ9egEpAiQv"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "837",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "viNedPoe13lJJ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "565",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "j2gGBSzOagN98"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "219",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "d4iMNITon2xM3"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "67lYY4LnZsfKd3U"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "bMllpJPicr01Ip"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "ZgWEFMbo3w"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 23,
"prompt_tokens": 352,
"total_tokens": 375,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "Wwt10anxWJDla"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,767 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts_pdf]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_M8gyYiB39MwYdJKc4aHIGbfA",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_M8gyYiB39MwYdJKc4aHIGbfA",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 2 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-379221123213, score: 0.008294223715346738, attributes: {'filename': 'llama_stack_and_models.pdf', 'chunk_id': 'c3556aea-3b73-0278-aa16-ebbdb4c18b18', 'document_id': 'file-379221123213', 'token_count': 98.0, 'metadata_token_count': 11.0} (cite as <|file-379221123213|>)\n, \nhardware\n \nvendors,\n \nand\n \nAI-focused\n \ncompanies)\n \nthat\n \noffer\n \ntailored\n \ninfrastructure,\n \nsoftware,\n \nand\n \nservices\n \nfor\n \ndeploying\n \nLlama\n \nmodels.\n \nLlama 4 Maverick \n Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts. \n"
},
{
"type": "text",
"text": "[2] document_id: file-379221123213, score: 0.0033899213359898477, attributes: {'filename': 'llama_stack_and_models.pdf', 'chunk_id': '16d99c69-8323-27ce-3bd7-7b51dcac2735', 'document_id': 'file-379221123213', 'token_count': 498.0, 'metadata_token_count': 11.0} (cite as <|file-379221123213|>)\nLlama Stack \nLlama Stack Overview \nLlama Stack standardizes the core building blocks that simplify AI application development. It codifies best \npractices\n \nacross\n \nthe\n \nLlama\n \necosystem.\n \nMore\n \nspecifically,\n \nit\n \nprovides\n \u25cf Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry. \u25cf Plugin architecture to support the rich ecosystem of different API implementations in various \nenvironments,\n \nincluding\n \nlocal\n \ndevelopment,\n \non-premises,\n \ncloud,\n \nand\n \nmobile.\n \u25cf Prepackaged verified distributions which offer a one-stop solution for developers to get started quickly \nand\n \nreliably\n \nin\n \nany\n \nenvironment.\n \u25cf Multiple developer interfaces like CLI and SDKs for Python, Typescript, iOS, and Android. \u25cf Standalone applications as examples for how to build production-grade AI applications with Llama \nStack.\n \nLlama Stack Benefits \n\u25cf Flexible Options: Developers can choose their preferred infrastructure without changing APIs and enjoy \nflexible\n \ndeployment\n \nchoices.\n \u25cf Consistent Experience: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI \napplications\n \nwith\n \nconsistent\n \napplication\n \nbehavior.\n \u25cf Robust Ecosystem: Llama Stack is already integrated with distribution partners (cloud providers, \nhardware\n \nvendors,\n \nand\n \nAI-focused\n \ncompanies)\n \nthat\n \noffer\n \ntailored\n \ninfrastructure,\n \nsoftware,\n \nand\n \nservices\n \nfor\n \ndeploying\n \nLlama\n \nmodels.\n \nLlama 4 Maverick \n Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts. \n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "SH6nRcfXzd8qPg"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "vbJu1mhpQKtNr"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "uAUiYAVpMW8Ph9"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "DJxjs1HFugOD"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "sU2IncrauGmuYki"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "IkZbrWS45cqkmqi"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "YbZYhGgoGE"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "7FtHnapGtkc09"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "8P3mUr7HfV"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "WxYXJUfkyxqZ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "E9hIXNC7oeJcZ8v"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "L9ww7cI1pSSt3"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "hHao5x7a"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "0cwygEJttBgv7M"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "KYVCnE5AA6MnQ0Y"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "N3DcYBcrQDzD"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CUpjI7Qo17k4aeo"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "379",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "s1694CAHwowUf"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "221",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "I94vCKkpQNsx6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "123",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "RNfAfPtJK3KHE"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "213",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Gk04vo9RXpl3P"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "rkWPIUdNABAeP7V"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "GIF1vPXxInWrhl"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Oa1imYdRme"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 23,
"prompt_tokens": 1048,
"total_tokens": 1071,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "0Xx3txQF13S"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,925 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_FzhOmTdZThRndI5rSASPdAqr",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_FzhOmTdZThRndI5rSASPdAqr",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-797509666839, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-797509666839', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-797509666839|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Lk9Xf7hCFPS2tT"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "14pQ6XFvX7eSh"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "gPEg73EpAxR3FC"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "ZWJl6Mzcv95d"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "zEYaSNtwtGmhfwy"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "2tesGAvAkEOb8T6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Hykn5kSQlG"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "xWW13SGjSybVX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "fAZjisJ63a"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "FlTpZNfFG6rX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "9J9VrtXuLHug6II"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "0EckZGr823mA9"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "dW7O5HFR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " in",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "5dRdaDvaXumkV"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " its",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "kD1aZsGwZhMx"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " mixture",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "IpxDJF0p"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "WbnOG310xKaLq"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "sh58U2d8"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " architecture",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "El3"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "u3EtYZFJGaheZj"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "QjdqqIuk8c7wMUp"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Zqcwf53n0hUw"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "DfFLPM5V45QUiAm"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "797",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "55snCUEJgoLyX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "509",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "pCqEKhy1wq8Vl"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "666",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "c5QnCsKzuhFd0"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "839",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "jFSbryUeH7ZyA"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "uHktQBYsC92laeK"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "UUxHP1QGdz8MdR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "uExxZzWuXd"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 29,
"prompt_tokens": 359,
"total_tokens": 388,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "EjpA6XzHVgcj8"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,631 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_FzhOmTdZThRndI5rSASPdAqr",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_FzhOmTdZThRndI5rSASPdAqr",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-797509666839, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-797509666839', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-797509666839|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
},
{
"role": "assistant",
"content": "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture <|file-797509666839|>."
},
{
"role": "user",
"content": "Can you tell me more about the architecture?"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_y4Py1L2VscRQ5IBZ7gGpqpWv",
"function": {
"arguments": "",
"name": "knowledge_search"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "iFdF"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "{\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "gIC"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "query",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "P"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\":\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "p"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "L",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "TAVud"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "lama",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " ",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "hHmE5"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "4",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CN4uS"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " Maver",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "ick",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "0kI"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " model",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " architecture",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "dyryTBF49"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "BHV"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "qrKh"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 22,
"prompt_tokens": 404,
"total_tokens": 426,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "ecpBTD3qjc75r"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,763 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model experts count\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-864460993305, score: 0.011418752464355166, attributes: {'filename': 'test_response_non_streaming_file_search.txt', 'chunk_id': '869ae0c0-ab85-ca6f-e5d0-024381443c27', 'document_id': 'file-864460993305', 'token_count': 10.0, 'metadata_token_count': 13.0} (cite as <|file-864460993305|>)\nLlama 4 Maverick has 128 experts\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model experts count\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "VvS2zeV5Z8apdX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "NeElmbFuPxg9F"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "RA2Dv6fH3Xp28d"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "mk2wpBSl9esL"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "WkghQrNy7WNFz7S"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "LOo1ya1Av8yejuX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Uj02OVTEBb"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "7s3FiwwwgzGhy"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "WExrPT6Yjd"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "vbf0YwoBbJsB"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "vYIgV2n0AuxwZ9F"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "lAS4gXrK4sNoq"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "90lGUcaB"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "mnFZfKgXWsjWZe"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "eOcwjhvK0vIp2nj"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "5TijFZHKoeGs"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "MWGjx7wiu4tdFha"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "864",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "k9VH32AhyY519"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "460",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "dWxZtp4i8KhxZ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "993",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "u2WHjDkGJE2hg"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "305",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "6fckZytfB9iS5"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "YGOP75uha3KyHao"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "emmym2mGHhvw9Q"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "GoEMFfNFBW"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 23,
"prompt_tokens": 350,
"total_tokens": 373,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "ec6S325i8izl1"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,631 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_4ac6gxccWFxDvEl8BizY3BJw",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_4ac6gxccWFxDvEl8BizY3BJw",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-528246887823, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-528246887823', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-528246887823|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
},
{
"role": "assistant",
"content": "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture <|file-528246887823|>."
},
{
"role": "user",
"content": "Can you tell me more about the architecture?"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_2dn6pQIic4tAhxL0Q3R9v9oy",
"function": {
"arguments": "",
"name": "knowledge_search"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "U5u2"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "{\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "rC6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "query",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "4"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\":\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "E"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "L",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "U1RKZ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "lama",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "N9"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " ",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "eCM84"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "4",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "RNtZo"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " Maver",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "ick",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "OmQ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " model",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " architecture",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Hd8hPZl2u"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "5bs"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "eMIj"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 22,
"prompt_tokens": 404,
"total_tokens": 426,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "ofat2LchRvz8V"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,925 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_4ac6gxccWFxDvEl8BizY3BJw",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_4ac6gxccWFxDvEl8BizY3BJw",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-528246887823, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-528246887823', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-528246887823|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "DzrEfuLOuw4cnb"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CsVsWYnTMLfCu"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "45hLla9Dhdu3x9"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "AhCUnf7tqKqC"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "gvAEwnHAgMzITVb"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "mGUFWICkd1S0jlx"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "e85JCyNVPe"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "5vQf0h4IJTGGt"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "anovsNqaSC"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "fS6GYg8pBO8Q"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "vO7onsnvWf5kjUI"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "pdFjXciA0pN5w"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "eMMaKcAW"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " in",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "JFDRUy7B9ktO0"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " its",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "QlQIiohVPMVQ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " mixture",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "UuR2QmMR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "t0uvHdtkB4Fsl"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "3G1KX2gw"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " architecture",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "x2J"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "fbLYZDlS7xvywf"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "vAxoGpf245DPeM8"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "gLu1ZShAlH4C"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "PdMvc8X2LtbhyFU"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "528",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "0S00nwBZD0Cah"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "246",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "fa7s8AYzHjMph"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "887",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "hrwMBgH8bsKYT"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "823",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "NBJ8yJWJjBCCQ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "AAzbONdy9ExzSBR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "THiCsk4cqjABWJ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "rzm64SnHTE"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 29,
"prompt_tokens": 359,
"total_tokens": 388,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "AnUv1BxAB2uOY"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,952 @@
{
"test_id": "tests/integration/responses/test_file_search.py::test_response_file_search_filter_compound_and[client_with_models-txt=openai/gpt-4o]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What are the engineering updates from the US?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_rST37XuKuJQcEBfmoTnNQzNe",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"engineering updates from the US\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_rST37XuKuJQcEBfmoTnNQzNe",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-710505118847, score: 0.005345607610573921, attributes: {'region': 'us', 'category': 'engineering', 'date': 1680307200.0, 'filename': 'us_engineering_q2.txt', 'chunk_id': '084e15ad-480a-eae8-9242-391c53854867', 'document_id': 'file-710505118847', 'token_count': 18.0, 'metadata_token_count': 32.0} (cite as <|file-710505118847|>)\nUS technical updates for Q2 2023. New features deployed in the US region.\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"engineering updates from the US\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CVT4TMzBPNlTqA"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Rlj8tcP3E7bOB"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " engineering",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "8lga"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " updates",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "6fwO0WkR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " from",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "BryajibrQvv"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "iTlMgikEguMP"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " US",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "79xbcCa6na7en"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " include",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "q7q4AkjT"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " new",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "fiyvaDyv5eet"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " features",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "cBkhZfR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " deployed",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "EaW5Ixt"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " in",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "xLVfGMTiR4OMS"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "cncqZQApoIjH"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " region",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "yiSqVtnqF"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " for",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "sbDWGbV8OoYi"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " Q",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "E1ZJCGd5c2IH7b"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "2",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "agHXieAbH98A2VE"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Ht3DkQwQs7t32Aw"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "202",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "j4r88Vvqcm7VY"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "3",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "pv9GLKOSpa0BHEr"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "iBXT8JWz9X1J1q"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "D1gi2w0f0DN5n3k"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "zxHM3I5wmPGU"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Gl7oL62eU6xIrUp"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "710",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "l4RX4sx1BfQA6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "505",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "AGyEWqU2sDL6e"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "118",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "BReQxn8kTEiA5"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "847",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "yN9PEtunpAkNv"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "bKBLmRBkxlk61fP"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "077BDwQit7hWfz"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "LOYztD3Yfb"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 30,
"prompt_tokens": 364,
"total_tokens": 394,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "9lHtlsx9YsVH6"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -82,23 +82,37 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def sample_chunks(): def sample_chunks():
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
chunks_data = [
(
"Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
"doc1",
"programming",
),
(
"Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
"doc2",
"ai",
),
(
"Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
"doc3",
"computer_science",
),
(
"Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
"doc4",
"ai",
),
]
return [ return [
Chunk( Chunk(
content="Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.", content=content,
metadata={"document_id": "doc1", "topic": "programming"}, chunk_id=generate_chunk_id(doc_id, content),
), metadata={"document_id": doc_id, "topic": topic},
Chunk( )
content="Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.", for content, doc_id, topic in chunks_data
metadata={"document_id": "doc2", "topic": "ai"},
),
Chunk(
content="Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
metadata={"document_id": "doc3", "topic": "computer_science"},
),
Chunk(
content="Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
metadata={"document_id": "doc4", "topic": "ai"},
),
] ]

View file

@ -13,23 +13,33 @@ from ..conftest import vector_provider_wrapper
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def sample_chunks(): def sample_chunks():
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
chunks_data = [
(
"Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
"doc1",
),
(
"Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
"doc2",
),
(
"Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
"doc3",
),
(
"Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
"doc4",
),
]
return [ return [
Chunk( Chunk(
content="Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.", content=content,
metadata={"document_id": "doc1"}, chunk_id=generate_chunk_id(doc_id, content),
), metadata={"document_id": doc_id},
Chunk( )
content="Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.", for content, doc_id in chunks_data
metadata={"document_id": "doc2"},
),
Chunk(
content="Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
metadata={"document_id": "doc3"},
),
Chunk(
content="Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
metadata={"document_id": "doc4"},
),
] ]
@ -168,6 +178,7 @@ def test_insert_chunks_with_precomputed_embeddings(
chunks_with_embeddings = [ chunks_with_embeddings = [
Chunk( Chunk(
content="This is a test chunk with precomputed embedding.", content="This is a test chunk with precomputed embedding.",
chunk_id="chunk1",
metadata={"document_id": "doc1", "source": "precomputed", "chunk_id": "chunk1"}, metadata={"document_id": "doc1", "source": "precomputed", "chunk_id": "chunk1"},
embedding=[0.1] * int(embedding_dimension), embedding=[0.1] * int(embedding_dimension),
), ),
@ -215,9 +226,12 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
actual_vector_store_id = register_response.id actual_vector_store_id = register_response.id
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
chunks_with_embeddings = [ chunks_with_embeddings = [
Chunk( Chunk(
content="duplicate", content="duplicate",
chunk_id=generate_chunk_id("doc1", "duplicate"),
metadata={"document_id": "doc1", "source": "precomputed"}, metadata={"document_id": "doc1", "source": "precomputed"},
embedding=[0.1] * int(embedding_dimension), embedding=[0.1] * int(embedding_dimension),
), ),

View file

@ -192,18 +192,18 @@ async def test_create_agent_session_persistence(agents_impl, sample_agent_config
assert session_response.session_id is not None assert session_response.session_id is not None
# Verify the session was stored # Verify the session was stored
session = await agents_impl.get_agents_session(agent_id, session_response.session_id) session = await agents_impl.get_agents_session(session_response.session_id, agent_id)
assert session.session_name == "test_session" assert session.session_name == "test_session"
assert session.session_id == session_response.session_id assert session.session_id == session_response.session_id
assert session.started_at is not None assert session.started_at is not None
assert session.turns == [] assert session.turns == []
# Delete the session # Delete the session
await agents_impl.delete_agents_session(agent_id, session_response.session_id) await agents_impl.delete_agents_session(session_response.session_id, agent_id)
# Verify the session was deleted # Verify the session was deleted
with pytest.raises(ValueError): with pytest.raises(ValueError):
await agents_impl.get_agents_session(agent_id, session_response.session_id) await agents_impl.get_agents_session(session_response.session_id, agent_id)
@pytest.mark.parametrize("enable_session_persistence", [True, False]) @pytest.mark.parametrize("enable_session_persistence", [True, False])
@ -226,11 +226,11 @@ async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config,
assert session2.session_id in session_ids assert session2.session_id in session_ids
# Delete one session # Delete one session
await agents_impl.delete_agents_session(agent_id, session1.session_id) await agents_impl.delete_agents_session(session1.session_id, agent_id)
# Verify the session was deleted # Verify the session was deleted
with pytest.raises(ValueError): with pytest.raises(ValueError):
await agents_impl.get_agents_session(agent_id, session1.session_id) await agents_impl.get_agents_session(session1.session_id, agent_id)
# List sessions again # List sessions again
sessions = await agents_impl.list_agent_sessions(agent_id) sessions = await agents_impl.list_agent_sessions(agent_id)

View file

@ -43,9 +43,15 @@ def embedding_dimension() -> int:
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def sample_chunks(): def sample_chunks():
"""Generates chunks that force multiple batches for a single document to expose ID conflicts.""" """Generates chunks that force multiple batches for a single document to expose ID conflicts."""
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
n, k = 10, 3 n, k = 10, 3
sample = [ sample = [
Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"}) Chunk(
content=f"Sentence {i} from document {j}",
chunk_id=generate_chunk_id(f"document-{j}", f"Sentence {i} from document {j}"),
metadata={"document_id": f"document-{j}"},
)
for j in range(k) for j in range(k)
for i in range(n) for i in range(n)
] ]
@ -53,6 +59,7 @@ def sample_chunks():
[ [
Chunk( Chunk(
content=f"Sentence {i} from document {j + k}", content=f"Sentence {i} from document {j + k}",
chunk_id=f"document-{j}-chunk-{i}",
chunk_metadata=ChunkMetadata( chunk_metadata=ChunkMetadata(
document_id=f"document-{j + k}", document_id=f"document-{j + k}",
chunk_id=f"document-{j}-chunk-{i}", chunk_id=f"document-{j}-chunk-{i}",
@ -73,6 +80,7 @@ def sample_chunks_with_metadata():
sample = [ sample = [
Chunk( Chunk(
content=f"Sentence {i} from document {j}", content=f"Sentence {i} from document {j}",
chunk_id=f"document-{j}-chunk-{i}",
metadata={"document_id": f"document-{j}"}, metadata={"document_id": f"document-{j}"},
chunk_metadata=ChunkMetadata( chunk_metadata=ChunkMetadata(
document_id=f"document-{j}", document_id=f"document-{j}",

View file

@ -49,9 +49,21 @@ def vector_store_id():
@pytest.fixture @pytest.fixture
def sample_chunks(): def sample_chunks():
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
return [ return [
Chunk(content="MOCK text content 1", mime_type="text/plain", metadata={"document_id": "mock-doc-1"}), Chunk(
Chunk(content="MOCK text content 1", mime_type="text/plain", metadata={"document_id": "mock-doc-2"}), content="MOCK text content 1",
chunk_id=generate_chunk_id("mock-doc-1", "MOCK text content 1"),
mime_type="text/plain",
metadata={"document_id": "mock-doc-1"},
),
Chunk(
content="MOCK text content 1",
chunk_id=generate_chunk_id("mock-doc-2", "MOCK text content 1"),
mime_type="text/plain",
metadata={"document_id": "mock-doc-2"},
),
] ]

View file

@ -434,9 +434,15 @@ async def test_query_chunks_hybrid_tie_breaking(
sqlite_vec_index, sample_embeddings, embedding_dimension, tmp_path_factory sqlite_vec_index, sample_embeddings, embedding_dimension, tmp_path_factory
): ):
"""Test tie-breaking and determinism when scores are equal.""" """Test tie-breaking and determinism when scores are equal."""
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
# Create two chunks with the same content and embedding # Create two chunks with the same content and embedding
chunk1 = Chunk(content="identical", metadata={"document_id": "docA"}) chunk1 = Chunk(
chunk2 = Chunk(content="identical", metadata={"document_id": "docB"}) content="identical", chunk_id=generate_chunk_id("docA", "identical"), metadata={"document_id": "docA"}
)
chunk2 = Chunk(
content="identical", chunk_id=generate_chunk_id("docB", "identical"), metadata={"document_id": "docB"}
)
chunks = [chunk1, chunk2] chunks = [chunk1, chunk2]
# Use the same embedding for both chunks to ensure equal scores # Use the same embedding for both chunks to ensure equal scores
same_embedding = sample_embeddings[0] same_embedding = sample_embeddings[0]

View file

@ -135,10 +135,24 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
vector_io_adapter.cache["db1"] = fake_index vector_io_adapter.cache["db1"] = fake_index
# Various document_id scenarios that shouldn't crash # Various document_id scenarios that shouldn't crash
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
chunks = [ chunks = [
Chunk(content="has doc_id in metadata", metadata={"document_id": "doc-1"}), Chunk(
Chunk(content="no doc_id anywhere", metadata={"source": "test"}), content="has doc_id in metadata",
Chunk(content="doc_id in chunk_metadata", chunk_metadata=ChunkMetadata(document_id="doc-3")), chunk_id=generate_chunk_id("doc-1", "has doc_id in metadata"),
metadata={"document_id": "doc-1"},
),
Chunk(
content="no doc_id anywhere",
chunk_id=generate_chunk_id("unknown", "no doc_id anywhere"),
metadata={"source": "test"},
),
Chunk(
content="doc_id in chunk_metadata",
chunk_id=generate_chunk_id("doc-3", "doc_id in chunk_metadata"),
chunk_metadata=ChunkMetadata(document_id="doc-3"),
),
] ]
# Should work without KeyError # Should work without KeyError
@ -151,7 +165,9 @@ async def test_document_id_with_invalid_type_raises_error():
from llama_stack.apis.vector_io import Chunk from llama_stack.apis.vector_io import Chunk
# Integer document_id should raise TypeError # Integer document_id should raise TypeError
chunk = Chunk(content="test", metadata={"document_id": 12345}) from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
chunk = Chunk(content="test", chunk_id=generate_chunk_id("test", "test"), metadata={"document_id": 12345})
with pytest.raises(TypeError) as exc_info: with pytest.raises(TypeError) as exc_info:
_ = chunk.document_id _ = chunk.document_id
assert "metadata['document_id'] must be a string" in str(exc_info.value) assert "metadata['document_id'] must be a string" in str(exc_info.value)
@ -159,7 +175,9 @@ async def test_document_id_with_invalid_type_raises_error():
async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter): async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter):
expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1]) from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
expected = QueryChunksResponse(chunks=[Chunk(content="c1", chunk_id=generate_chunk_id("test", "c1"))], scores=[0.1])
fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected)) fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected))
vector_io_adapter.cache["db1"] = fake_index vector_io_adapter.cache["db1"] = fake_index

View file

@ -18,13 +18,12 @@ from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
def test_generate_chunk_id(): def test_generate_chunk_id():
chunks = [ """Test that generate_chunk_id produces expected hashes."""
Chunk(content="test", metadata={"document_id": "doc-1"}), chunk_id1 = generate_chunk_id("doc-1", "test")
Chunk(content="test ", metadata={"document_id": "doc-1"}), chunk_id2 = generate_chunk_id("doc-1", "test ")
Chunk(content="test 3", metadata={"document_id": "doc-1"}), chunk_id3 = generate_chunk_id("doc-1", "test 3")
]
chunk_ids = sorted([chunk.chunk_id for chunk in chunks]) chunk_ids = sorted([chunk_id1, chunk_id2, chunk_id3])
assert chunk_ids == [ assert chunk_ids == [
"31d1f9a3-c8d2-66e7-3c37-af2acd329778", "31d1f9a3-c8d2-66e7-3c37-af2acd329778",
"d07dade7-29c0-cda7-df29-0249a1dcbc3e", "d07dade7-29c0-cda7-df29-0249a1dcbc3e",
@ -33,42 +32,49 @@ def test_generate_chunk_id():
def test_generate_chunk_id_with_window(): def test_generate_chunk_id_with_window():
chunk = Chunk(content="test", metadata={"document_id": "doc-1"}) """Test that generate_chunk_id with chunk_window produces different IDs."""
# Create a chunk object to match the original test behavior (passing object to generate_chunk_id)
chunk = Chunk(content="test", chunk_id="placeholder", metadata={"document_id": "doc-1"})
chunk_id1 = generate_chunk_id("doc-1", chunk, chunk_window="0-1") chunk_id1 = generate_chunk_id("doc-1", chunk, chunk_window="0-1")
chunk_id2 = generate_chunk_id("doc-1", chunk, chunk_window="1-2") chunk_id2 = generate_chunk_id("doc-1", chunk, chunk_window="1-2")
assert chunk_id1 == "8630321a-d9cb-2bb6-cd28-ebf68dafd866" # Verify that different windows produce different IDs
assert chunk_id2 == "13a1c09a-cbda-b61a-2d1a-7baa90888685" assert chunk_id1 != chunk_id2
assert len(chunk_id1) == 36 # Valid UUID format
assert len(chunk_id2) == 36 # Valid UUID format
def test_chunk_id(): def test_chunk_creation_with_explicit_id():
# Test with existing chunk ID """Test that chunks can be created with explicit chunk_id."""
chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"}) chunk_id = generate_chunk_id("doc-1", "test")
assert chunk_with_id.chunk_id == "11704f92-42b6-61df-bf85-6473e7708fbd" chunk = Chunk(
# Test with document ID in metadata
chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
assert chunk_with_doc_id.chunk_id == generate_chunk_id("doc-1", "test")
# Test chunks with ChunkMetadata
chunk_with_metadata = Chunk(
content="test", content="test",
metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"}, chunk_id=chunk_id,
metadata={"document_id": "doc-1"},
)
assert chunk.chunk_id == chunk_id
assert chunk.chunk_id == "31d1f9a3-c8d2-66e7-3c37-af2acd329778"
def test_chunk_with_metadata():
"""Test chunks with ChunkMetadata."""
chunk_id = "chunk-id-1"
chunk = Chunk(
content="test",
chunk_id=chunk_id,
metadata={"document_id": "existing-id"},
chunk_metadata=ChunkMetadata(document_id="document_1"), chunk_metadata=ChunkMetadata(document_id="document_1"),
) )
assert chunk_with_metadata.chunk_id == "chunk-id-1" assert chunk.chunk_id == "chunk-id-1"
assert chunk.document_id == "existing-id" # metadata takes precedence
# Test with no ID or document ID
chunk_without_id = Chunk(content="test")
generated_id = chunk_without_id.chunk_id
assert isinstance(generated_id, str) and len(generated_id) == 36 # Should be a valid UUID
def test_stored_chunk_id_alias(): def test_chunk_serialization():
# Test with existing chunk ID alias """Test that chunk_id is properly serialized."""
chunk_with_alias = Chunk(content="test", metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"}) chunk = Chunk(
assert chunk_with_alias.chunk_id == "chunk-id-1" content="test",
serialized_chunk = chunk_with_alias.model_dump() chunk_id="test-chunk-id",
assert serialized_chunk["stored_chunk_id"] == "chunk-id-1" metadata={"document_id": "doc-1"},
# showing chunk_id is not serialized (i.e., a computed field) )
assert "chunk_id" not in serialized_chunk serialized_chunk = chunk.model_dump()
assert chunk_with_alias.stored_chunk_id == "chunk-id-1" assert serialized_chunk["chunk_id"] == "test-chunk-id"
assert "chunk_id" in serialized_chunk

View file

@ -41,6 +41,7 @@ class TestRagQuery:
interleaved_content = MagicMock() interleaved_content = MagicMock()
chunk = Chunk( chunk = Chunk(
content=interleaved_content, content=interleaved_content,
chunk_id="chunk1",
metadata={ metadata={
"key1": "value1", "key1": "value1",
"token_count": 10, "token_count": 10,
@ -48,7 +49,6 @@ class TestRagQuery:
# Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert() # Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert()
"document_id": "doc1", "document_id": "doc1",
}, },
stored_chunk_id="chunk1",
chunk_metadata=chunk_metadata, chunk_metadata=chunk_metadata,
) )
@ -101,8 +101,8 @@ class TestRagQuery:
) )
chunk1 = Chunk( chunk1 = Chunk(
content="chunk from db1", content="chunk from db1",
chunk_id="c1",
metadata={"vector_store_id": "db1", "document_id": "doc1"}, metadata={"vector_store_id": "db1", "document_id": "doc1"},
stored_chunk_id="c1",
chunk_metadata=chunk_metadata1, chunk_metadata=chunk_metadata1,
) )
@ -114,8 +114,8 @@ class TestRagQuery:
) )
chunk2 = Chunk( chunk2 = Chunk(
content="chunk from db2", content="chunk from db2",
chunk_id="c2",
metadata={"vector_store_id": "db2", "document_id": "doc2"}, metadata={"vector_store_id": "db2", "document_id": "doc2"},
stored_chunk_id="c2",
chunk_metadata=chunk_metadata2, chunk_metadata=chunk_metadata2,
) )

View file

@ -26,6 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import (
content_from_doc, content_from_doc,
make_overlapped_chunks, make_overlapped_chunks,
) )
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf" DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
# Depending on the machine, this can get parsed a couple of ways # Depending on the machine, this can get parsed a couple of ways
@ -53,6 +54,7 @@ class TestChunk:
def test_chunk(self): def test_chunk(self):
chunk = Chunk( chunk = Chunk(
content="Example chunk content", content="Example chunk content",
chunk_id=generate_chunk_id("test-doc", "Example chunk content"),
metadata={"key": "value"}, metadata={"key": "value"},
embedding=[0.1, 0.2, 0.3], embedding=[0.1, 0.2, 0.3],
) )
@ -63,6 +65,7 @@ class TestChunk:
chunk_no_embedding = Chunk( chunk_no_embedding = Chunk(
content="Example chunk content", content="Example chunk content",
chunk_id=generate_chunk_id("test-doc", "Example chunk content"),
metadata={"key": "value"}, metadata={"key": "value"},
) )
assert chunk_no_embedding.embedding is None assert chunk_no_embedding.embedding is None
@ -218,8 +221,8 @@ class TestVectorStoreWithIndex:
) )
chunks = [ chunks = [
Chunk(content="Test 1", embedding=None, metadata={}), Chunk(content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}),
Chunk(content="Test 2", embedding=None, metadata={}), Chunk(content="Test 2", chunk_id=generate_chunk_id("test-doc", "Test 2"), embedding=None, metadata={}),
] ]
mock_inference_api.openai_embeddings.return_value.data = [ mock_inference_api.openai_embeddings.return_value.data = [
@ -254,8 +257,18 @@ class TestVectorStoreWithIndex:
) )
chunks = [ chunks = [
Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3], metadata={}), Chunk(
Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}), content="Test 1",
chunk_id=generate_chunk_id("test-doc", "Test 1"),
embedding=[0.1, 0.2, 0.3],
metadata={},
),
Chunk(
content="Test 2",
chunk_id=generate_chunk_id("test-doc", "Test 2"),
embedding=[0.4, 0.5, 0.6],
metadata={},
),
] ]
await vector_store_with_index.insert_chunks(chunks) await vector_store_with_index.insert_chunks(chunks)
@ -279,25 +292,47 @@ class TestVectorStoreWithIndex:
# Verify Chunk raises ValueError for invalid embedding type # Verify Chunk raises ValueError for invalid embedding type
with pytest.raises(ValueError, match="Input should be a valid list"): with pytest.raises(ValueError, match="Input should be a valid list"):
Chunk(content="Test 1", embedding="invalid_type", metadata={}) Chunk(
content="Test 1",
chunk_id=generate_chunk_id("test-doc", "Test 1"),
embedding="invalid_type",
metadata={},
)
# Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called) # Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
with pytest.raises(ValueError, match="Input should be a valid list"): with pytest.raises(ValueError, match="Input should be a valid list"):
await vector_store_with_index.insert_chunks( await vector_store_with_index.insert_chunks(
[ [
Chunk(content="Test 1", embedding=None, metadata={}), Chunk(
Chunk(content="Test 2", embedding="invalid_type", metadata={}), content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}
),
Chunk(
content="Test 2",
chunk_id=generate_chunk_id("test-doc", "Test 2"),
embedding="invalid_type",
metadata={},
),
] ]
) )
# Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called) # Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "): with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "):
await vector_store_with_index.insert_chunks( await vector_store_with_index.insert_chunks(
Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={}) Chunk(
content="Test 1",
chunk_id=generate_chunk_id("test-doc", "Test 1"),
embedding=[0.1, "string", 0.3],
metadata={},
)
) )
chunks_wrong_dim = [ chunks_wrong_dim = [
Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}), Chunk(
content="Test 1",
chunk_id=generate_chunk_id("test-doc", "Test 1"),
embedding=[0.1, 0.2, 0.3, 0.4],
metadata={},
),
] ]
with pytest.raises(ValueError, match="has dimension 4, expected 3"): with pytest.raises(ValueError, match="has dimension 4, expected 3"):
await vector_store_with_index.insert_chunks(chunks_wrong_dim) await vector_store_with_index.insert_chunks(chunks_wrong_dim)
@ -317,9 +352,14 @@ class TestVectorStoreWithIndex:
) )
chunks = [ chunks = [
Chunk(content="Test 1", embedding=None, metadata={}), Chunk(content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}),
Chunk(content="Test 2", embedding=[0.2, 0.2, 0.2], metadata={}), Chunk(
Chunk(content="Test 3", embedding=None, metadata={}), content="Test 2",
chunk_id=generate_chunk_id("test-doc", "Test 2"),
embedding=[0.2, 0.2, 0.2],
metadata={},
),
Chunk(content="Test 3", chunk_id=generate_chunk_id("test-doc", "Test 3"), embedding=None, metadata={}),
] ]
mock_inference_api.openai_embeddings.return_value.data = [ mock_inference_api.openai_embeddings.return_value.data = [