Merge remote-tracking branch 'upstream/main' into feat/gunicorn-production-server

This commit is contained in:
Roy Belio 2025-10-30 16:43:34 +02:00
commit b060f73e6d
70 changed files with 46290 additions and 1133 deletions

View file

@ -43,6 +43,9 @@ jobs:
cache: 'npm'
cache-dependency-path: 'src/llama_stack/ui/'
- name: Set up uv
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
- name: Install npm dependencies
run: npm ci
working-directory: src/llama_stack/ui
@ -52,7 +55,7 @@ jobs:
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
continue-on-error: true
env:
SKIP: no-commit-to-branch
SKIP: no-commit-to-branch,mypy
RUFF_OUTPUT_FORMAT: github
- name: Check pre-commit results
@ -109,3 +112,16 @@ jobs:
echo "$unstaged_files"
exit 1
fi
- name: Sync dev + type_checking dependencies
run: uv sync --group dev --group type_checking
- name: Run mypy (full type_checking)
run: |
set +e
uv run --group dev --group type_checking mypy
status=$?
if [ $status -ne 0 ]; then
echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
fi
exit $status

3
.gitignore vendored
View file

@ -32,3 +32,6 @@ CLAUDE.md
docs/.docusaurus/
docs/node_modules/
docs/static/imported-files/
docs/docs/api-deprecated/
docs/docs/api-experimental/
docs/docs/api/

View file

@ -57,17 +57,27 @@ repos:
hooks:
- id: uv-lock
- repo: local
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.18.2
hooks:
- id: mypy
name: mypy
additional_dependencies:
- uv==0.7.8
entry: uv run --group dev --group type_checking mypy
language: python
types: [python]
- uv==0.6.2
- pytest
- rich
- types-requests
- pydantic
- httpx
pass_filenames: false
require_serial: true
- repo: local
hooks:
- id: mypy-full
name: mypy (full type_checking)
entry: uv run --group dev --group type_checking mypy
language: system
pass_filenames: false
stages: [manual]
# - repo: https://github.com/tcort/markdown-link-check
# rev: v3.11.2
@ -152,7 +162,6 @@ repos:
files: ^src/llama_stack/ui/.*\.(ts|tsx)$
pass_filenames: false
require_serial: true
- id: check-log-usage
name: Ensure 'llama_stack.log' usage for logging
entry: bash
@ -171,7 +180,23 @@ repos:
exit 1
fi
exit 0
- id: fips-compliance
name: Ensure llama-stack remains FIPS compliant
entry: bash
language: system
types: [python]
pass_filenames: true
exclude: '^tests/.*$' # Exclude test dir as some safety tests used MD5
args:
- -c
- |
grep -EnH '^[^#]*\b(md5|sha1|uuid3|uuid5)\b' "$@" && {
echo;
echo "❌ Do not use any of the following functions: hashlib.md5, hashlib.sha1, uuid.uuid3, uuid.uuid5"
echo " These functions are not FIPS-compliant"
echo;
exit 1;
} || true
ci:
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate

View file

@ -61,6 +61,18 @@ uv run pre-commit run --all-files -v
The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify.
To run the expanded mypy configuration that CI enforces, use:
```bash
uv run pre-commit run mypy-full --hook-stage manual --all-files
```
or invoke mypy directly with all optional dependencies:
```bash
uv run --group dev --group type_checking mypy
```
```{caution}
Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
```

View file

@ -1,610 +0,0 @@
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
organization:
# Name of your organization or company, used to determine the name of the client
# and headings.
name: llama-stack-client
docs: https://llama-stack.readthedocs.io/en/latest/
contact: llamastack@meta.com
security:
- {}
- BearerAuth: []
security_schemes:
BearerAuth:
type: http
scheme: bearer
# `targets` define the output targets and their customization options, such as
# whether to emit the Node SDK and what it's package name should be.
targets:
node:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-typescript
publish:
npm: false
python:
package_name: llama_stack_client
production_repo: llamastack/llama-stack-client-python
options:
use_uv: true
publish:
pypi: true
project_name: llama_stack_client
kotlin:
reverse_domain: com.llama_stack_client.api
production_repo: null
publish:
maven: false
go:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-go
options:
enable_v2: true
back_compat_use_shared_package: false
# `client_settings` define settings for the API client, such as extra constructor
# arguments (used for authentication), retry behavior, idempotency, etc.
client_settings:
default_env_prefix: LLAMA_STACK_CLIENT
opts:
api_key:
type: string
read_env: LLAMA_STACK_CLIENT_API_KEY
auth: { security_scheme: BearerAuth }
nullable: true
# `environments` are a map of the name of the environment (e.g. "sandbox",
# "production") to the corresponding url to use.
environments:
production: http://any-hosted-llama-stack.com
# `pagination` defines [pagination schemes] which provides a template to match
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
pagination:
- name: datasets_iterrows
type: offset
request:
dataset_id:
type: string
start_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_param
limit:
type: integer
response:
data:
type: array
items:
type: object
next_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_start_field
- name: openai_cursor_page
type: cursor
request:
limit:
type: integer
after:
type: string
x-stainless-pagination-property:
purpose: next_cursor_param
response:
data:
type: array
items: {}
has_more:
type: boolean
last_id:
type: string
x-stainless-pagination-property:
purpose: next_cursor_field
# `resources` define the structure and organziation for your API, such as how
# methods and models are grouped together and accessed. See the [configuration
# guide] for more information.
#
# [configuration guide]:
# https://app.stainlessapi.com/docs/guides/configure#resources
resources:
$shared:
models:
agent_config: AgentConfig
interleaved_content_item: InterleavedContentItem
interleaved_content: InterleavedContent
param_type: ParamType
safety_violation: SafetyViolation
sampling_params: SamplingParams
scoring_result: ScoringResult
message: Message
user_message: UserMessage
completion_message: CompletionMessage
tool_response_message: ToolResponseMessage
system_message: SystemMessage
tool_call: ToolCall
query_result: RAGQueryResult
document: RAGDocument
query_config: RAGQueryConfig
response_format: ResponseFormat
toolgroups:
models:
tool_group: ToolGroup
list_tool_groups_response: ListToolGroupsResponse
methods:
register: post /v1/toolgroups
get: get /v1/toolgroups/{toolgroup_id}
list: get /v1/toolgroups
unregister: delete /v1/toolgroups/{toolgroup_id}
tools:
methods:
get: get /v1/tools/{tool_name}
list:
endpoint: get /v1/tools
paginated: false
tool_runtime:
models:
tool_def: ToolDef
tool_invocation_result: ToolInvocationResult
methods:
list_tools:
endpoint: get /v1/tool-runtime/list-tools
paginated: false
invoke_tool: post /v1/tool-runtime/invoke
subresources:
rag_tool:
methods:
insert: post /v1/tool-runtime/rag-tool/insert
query: post /v1/tool-runtime/rag-tool/query
responses:
models:
response_object_stream: OpenAIResponseObjectStream
response_object: OpenAIResponseObject
methods:
create:
type: http
endpoint: post /v1/responses
streaming:
stream_event_model: responses.response_object_stream
param_discriminator: stream
retrieve: get /v1/responses/{response_id}
list:
type: http
endpoint: get /v1/responses
delete:
type: http
endpoint: delete /v1/responses/{response_id}
subresources:
input_items:
methods:
list:
type: http
endpoint: get /v1/responses/{response_id}/input_items
conversations:
models:
conversation_object: Conversation
methods:
create:
type: http
endpoint: post /v1/conversations
retrieve: get /v1/conversations/{conversation_id}
update:
type: http
endpoint: post /v1/conversations/{conversation_id}
delete:
type: http
endpoint: delete /v1/conversations/{conversation_id}
subresources:
items:
methods:
get:
type: http
endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
list:
type: http
endpoint: get /v1/conversations/{conversation_id}/items
create:
type: http
endpoint: post /v1/conversations/{conversation_id}/items
inspect:
models:
healthInfo: HealthInfo
providerInfo: ProviderInfo
routeInfo: RouteInfo
versionInfo: VersionInfo
methods:
health: get /v1/health
version: get /v1/version
embeddings:
models:
create_embeddings_response: OpenAIEmbeddingsResponse
methods:
create: post /v1/embeddings
chat:
models:
chat_completion_chunk: OpenAIChatCompletionChunk
subresources:
completions:
methods:
create:
type: http
endpoint: post /v1/chat/completions
streaming:
stream_event_model: chat.chat_completion_chunk
param_discriminator: stream
list:
type: http
endpoint: get /v1/chat/completions
retrieve:
type: http
endpoint: get /v1/chat/completions/{completion_id}
completions:
methods:
create:
type: http
endpoint: post /v1/completions
streaming:
param_discriminator: stream
vector_io:
models:
queryChunksResponse: QueryChunksResponse
methods:
insert: post /v1/vector-io/insert
query: post /v1/vector-io/query
vector_stores:
models:
vector_store: VectorStoreObject
list_vector_stores_response: VectorStoreListResponse
vector_store_delete_response: VectorStoreDeleteResponse
vector_store_search_response: VectorStoreSearchResponsePage
methods:
create: post /v1/vector_stores
list:
endpoint: get /v1/vector_stores
retrieve: get /v1/vector_stores/{vector_store_id}
update: post /v1/vector_stores/{vector_store_id}
delete: delete /v1/vector_stores/{vector_store_id}
search: post /v1/vector_stores/{vector_store_id}/search
subresources:
files:
models:
vector_store_file: VectorStoreFileObject
methods:
list: get /v1/vector_stores/{vector_store_id}/files
retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
create: post /v1/vector_stores/{vector_store_id}/files
content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
file_batches:
models:
vector_store_file_batches: VectorStoreFileBatchObject
list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
methods:
create: post /v1/vector_stores/{vector_store_id}/file_batches
retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
models:
models:
model: Model
list_models_response: ListModelsResponse
methods:
retrieve: get /v1/models/{model_id}
list:
endpoint: get /v1/models
paginated: false
register: post /v1/models
unregister: delete /v1/models/{model_id}
subresources:
openai:
methods:
list:
endpoint: get /v1/models
paginated: false
providers:
models:
list_providers_response: ListProvidersResponse
methods:
list:
endpoint: get /v1/providers
paginated: false
retrieve: get /v1/providers/{provider_id}
routes:
models:
list_routes_response: ListRoutesResponse
methods:
list:
endpoint: get /v1/inspect/routes
paginated: false
moderations:
models:
create_response: ModerationObject
methods:
create: post /v1/moderations
safety:
models:
run_shield_response: RunShieldResponse
methods:
run_shield: post /v1/safety/run-shield
shields:
models:
shield: Shield
list_shields_response: ListShieldsResponse
methods:
retrieve: get /v1/shields/{identifier}
list:
endpoint: get /v1/shields
paginated: false
register: post /v1/shields
delete: delete /v1/shields/{identifier}
synthetic_data_generation:
models:
syntheticDataGenerationResponse: SyntheticDataGenerationResponse
methods:
generate: post /v1/synthetic-data-generation/generate
telemetry:
models:
span_with_status: SpanWithStatus
trace: Trace
query_spans_response: QuerySpansResponse
event: Event
query_condition: QueryCondition
methods:
query_traces:
endpoint: post /v1alpha/telemetry/traces
skip_test_reason: 'unsupported query params in java / kotlin'
get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
query_spans:
endpoint: post /v1alpha/telemetry/spans
skip_test_reason: 'unsupported query params in java / kotlin'
query_metrics:
endpoint: post /v1alpha/telemetry/metrics/{metric_name}
skip_test_reason: 'unsupported query params in java / kotlin'
# log_event: post /v1alpha/telemetry/events
save_spans_to_dataset: post /v1alpha/telemetry/spans/export
get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
get_trace: get /v1alpha/telemetry/traces/{trace_id}
scoring:
methods:
score: post /v1/scoring/score
score_batch: post /v1/scoring/score-batch
scoring_functions:
methods:
retrieve: get /v1/scoring-functions/{scoring_fn_id}
list:
endpoint: get /v1/scoring-functions
paginated: false
register: post /v1/scoring-functions
models:
scoring_fn: ScoringFn
scoring_fn_params: ScoringFnParams
list_scoring_functions_response: ListScoringFunctionsResponse
benchmarks:
methods:
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
list:
endpoint: get /v1alpha/eval/benchmarks
paginated: false
register: post /v1alpha/eval/benchmarks
models:
benchmark: Benchmark
list_benchmarks_response: ListBenchmarksResponse
files:
methods:
create: post /v1/files
list: get /v1/files
retrieve: get /v1/files/{file_id}
delete: delete /v1/files/{file_id}
content: get /v1/files/{file_id}/content
models:
file: OpenAIFileObject
list_files_response: ListOpenAIFileResponse
delete_file_response: OpenAIFileDeleteResponse
alpha:
subresources:
inference:
methods:
rerank: post /v1alpha/inference/rerank
post_training:
models:
algorithm_config: AlgorithmConfig
post_training_job: PostTrainingJob
list_post_training_jobs_response: ListPostTrainingJobsResponse
methods:
preference_optimize: post /v1alpha/post-training/preference-optimize
supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
subresources:
job:
methods:
artifacts: get /v1alpha/post-training/job/artifacts
cancel: post /v1alpha/post-training/job/cancel
status: get /v1alpha/post-training/job/status
list:
endpoint: get /v1alpha/post-training/jobs
paginated: false
eval:
methods:
evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
subresources:
jobs:
methods:
cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
models:
evaluate_response: EvaluateResponse
benchmark_config: BenchmarkConfig
job: Job
agents:
methods:
create: post /v1alpha/agents
list: get /v1alpha/agents
retrieve: get /v1alpha/agents/{agent_id}
delete: delete /v1alpha/agents/{agent_id}
models:
inference_step: InferenceStep
tool_execution_step: ToolExecutionStep
tool_response: ToolResponse
shield_call_step: ShieldCallStep
memory_retrieval_step: MemoryRetrievalStep
subresources:
session:
models:
session: Session
methods:
list: get /v1alpha/agents/{agent_id}/sessions
create: post /v1alpha/agents/{agent_id}/session
delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
steps:
methods:
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
turn:
models:
turn: Turn
turn_response_event: AgentTurnResponseEvent
agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
methods:
create:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
resume:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
beta:
subresources:
datasets:
models:
list_datasets_response: ListDatasetsResponse
methods:
register: post /v1beta/datasets
retrieve: get /v1beta/datasets/{dataset_id}
list:
endpoint: get /v1beta/datasets
paginated: false
unregister: delete /v1beta/datasets/{dataset_id}
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
settings:
license: MIT
unwrap_response_fields: [ data ]
openapi:
transformations:
- command: renameValue
reason: pydantic reserved name
args:
filter:
only:
- '$.components.schemas.InferenceStep.properties.model_response'
rename:
python:
property_name: 'inference_model_response'
# - command: renameValue
# reason: pydantic reserved name
# args:
# filter:
# only:
# - '$.components.schemas.Model.properties.model_type'
# rename:
# python:
# property_name: 'type'
- command: mergeObject
reason: Better return_type using enum
args:
target:
- '$.components.schemas'
object:
ReturnType:
additionalProperties: false
properties:
type:
enum:
- string
- number
- boolean
- array
- object
- json
- union
- chat_completion_input
- completion_input
- agent_turn_input
required:
- type
type: object
- command: replaceProperties
reason: Replace return type properties with better model (see above)
args:
filter:
only:
- '$.components.schemas.ScoringFn.properties.return_type'
- '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
value:
$ref: '#/components/schemas/ReturnType'
- command: oneOfToAnyOf
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
- reason: For better names
command: extractToRefs
args:
ref:
target: '$.components.schemas.ToolCallDelta.properties.tool_call'
name: '#/components/schemas/ToolCallOrString'
# `readme` is used to configure the code snippets that will be rendered in the
# README.md of various SDKs. In particular, you can change the `headline`
# snippet's endpoint and the arguments to call it with.
readme:
example_requests:
default:
type: request
endpoint: post /v1/chat/completions
params: &ref_0 {}
headline:
type: request
endpoint: post /v1/models
params: *ref_0
pagination:
type: request
endpoint: post /v1/chat/completions
params: {}

View file

@ -15,6 +15,141 @@ info:
servers:
- url: http://any-hosted-llama-stack.com
paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions:
get:
responses:
@ -4212,6 +4347,331 @@ components:
title: Error
description: >-
Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order:
type: string
enum:
@ -10258,6 +10718,10 @@ components:
description: >-
The content of the chunk, which can be interleaved text, images, or other
types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata:
type: object
additionalProperties:
@ -10278,10 +10742,6 @@ components:
description: >-
Optional embedding for the chunk. If not provided, it will be computed
later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata'
description: >-
@ -10290,6 +10750,7 @@ components:
additionalProperties: false
required:
- content
- chunk_id
- metadata
title: Chunk
description: >-
@ -13527,6 +13988,19 @@ tags:
description: >-
APIs for creating and interacting with agentic systems.
x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks
description: ''
- name: Conversations
@ -13601,6 +14075,7 @@ x-tagGroups:
- name: Operations
tags:
- Agents
- Batches
- Benchmarks
- Conversations
- DatasetIO

File diff suppressed because it is too large Load diff

View file

@ -242,15 +242,6 @@ const sidebars: SidebarsConfig = {
'providers/eval/remote_nvidia'
],
},
{
type: 'category',
label: 'Telemetry',
collapsed: true,
items: [
'providers/telemetry/index',
'providers/telemetry/inline_meta-reference'
],
},
{
type: 'category',
label: 'Batches',

View file

@ -1414,6 +1414,193 @@
"deprecated": true
}
},
"/v1/openai/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": true
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/openai/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/openai/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/openai/v1/chat/completions": {
"get": {
"responses": {
@ -6401,6 +6588,451 @@
"title": "Job",
"description": "A job execution instance with status tracking."
},
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": {
"type": "string",
"enum": [
@ -13505,6 +14137,11 @@
"description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
"x-displayName": "Agents"
},
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{
"name": "Benchmarks",
"description": ""
@ -13555,6 +14192,7 @@
"name": "Operations",
"tags": [
"Agents",
"Batches",
"Benchmarks",
"DatasetIO",
"Datasets",

View file

@ -1012,6 +1012,141 @@ paths:
schema:
type: string
deprecated: true
/v1/openai/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: true
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: true
/v1/openai/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: true
/v1/openai/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: true
/v1/openai/v1/chat/completions:
get:
responses:
@ -4736,6 +4871,331 @@ components:
title: Job
description: >-
A job execution instance with status tracking.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order:
type: string
enum:
@ -10263,6 +10723,19 @@ tags:
- **Responses API**: Use the stable v1 Responses API endpoints
x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks
description: ''
- name: DatasetIO
@ -10308,6 +10781,7 @@ x-tagGroups:
- name: Operations
tags:
- Agents
- Batches
- Benchmarks
- DatasetIO
- Datasets

View file

@ -40,6 +40,193 @@
}
],
"paths": {
"/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": false
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/chat/completions": {
"get": {
"responses": {
@ -4005,6 +4192,451 @@
"title": "Error",
"description": "Error response from the API. Roughly follows RFC 7807."
},
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": {
"type": "string",
"enum": [
@ -11897,6 +12529,10 @@
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the chunk, which can be interleaved text, images, or other types."
},
"chunk_id": {
"type": "string",
"description": "Unique identifier for the chunk. Must be provided explicitly."
},
"metadata": {
"type": "object",
"additionalProperties": {
@ -11930,10 +12566,6 @@
},
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
},
"stored_chunk_id": {
"type": "string",
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
},
"chunk_metadata": {
"$ref": "#/components/schemas/ChunkMetadata",
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -11942,6 +12574,7 @@
"additionalProperties": false,
"required": [
"content",
"chunk_id",
"metadata"
],
"title": "Chunk",
@ -13288,6 +13921,11 @@
"description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
"x-displayName": "Agents"
},
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{
"name": "Conversations",
"description": "Protocol for conversation management operations.",
@ -13361,6 +13999,7 @@
"name": "Operations",
"tags": [
"Agents",
"Batches",
"Conversations",
"Files",
"Inference",

View file

@ -12,6 +12,141 @@ info:
servers:
- url: http://any-hosted-llama-stack.com
paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions:
get:
responses:
@ -2999,6 +3134,331 @@ components:
title: Error
description: >-
Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order:
type: string
enum:
@ -9045,6 +9505,10 @@ components:
description: >-
The content of the chunk, which can be interleaved text, images, or other
types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata:
type: object
additionalProperties:
@ -9065,10 +9529,6 @@ components:
description: >-
Optional embedding for the chunk. If not provided, it will be computed
later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata'
description: >-
@ -9077,6 +9537,7 @@ components:
additionalProperties: false
required:
- content
- chunk_id
- metadata
title: Chunk
description: >-
@ -10143,6 +10604,19 @@ tags:
- `background`
x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Conversations
description: >-
Protocol for conversation management operations.
@ -10205,6 +10679,7 @@ x-tagGroups:
- name: Operations
tags:
- Agents
- Batches
- Conversations
- Files
- Inference

View file

@ -40,6 +40,193 @@
}
],
"paths": {
"/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": false
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/chat/completions": {
"get": {
"responses": {
@ -5677,6 +5864,451 @@
"title": "Error",
"description": "Error response from the API. Roughly follows RFC 7807."
},
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": {
"type": "string",
"enum": [
@ -13569,6 +14201,10 @@
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the chunk, which can be interleaved text, images, or other types."
},
"chunk_id": {
"type": "string",
"description": "Unique identifier for the chunk. Must be provided explicitly."
},
"metadata": {
"type": "object",
"additionalProperties": {
@ -13602,10 +14238,6 @@
},
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
},
"stored_chunk_id": {
"type": "string",
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
},
"chunk_metadata": {
"$ref": "#/components/schemas/ChunkMetadata",
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -13614,6 +14246,7 @@
"additionalProperties": false,
"required": [
"content",
"chunk_id",
"metadata"
],
"title": "Chunk",
@ -17960,6 +18593,11 @@
"description": "APIs for creating and interacting with agentic systems.",
"x-displayName": "Agents"
},
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{
"name": "Benchmarks",
"description": ""
@ -18054,6 +18692,7 @@
"name": "Operations",
"tags": [
"Agents",
"Batches",
"Benchmarks",
"Conversations",
"DatasetIO",

View file

@ -15,6 +15,141 @@ info:
servers:
- url: http://any-hosted-llama-stack.com
paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions:
get:
responses:
@ -4212,6 +4347,331 @@ components:
title: Error
description: >-
Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order:
type: string
enum:
@ -10258,6 +10718,10 @@ components:
description: >-
The content of the chunk, which can be interleaved text, images, or other
types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata:
type: object
additionalProperties:
@ -10278,10 +10742,6 @@ components:
description: >-
Optional embedding for the chunk. If not provided, it will be computed
later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata'
description: >-
@ -10290,6 +10750,7 @@ components:
additionalProperties: false
required:
- content
- chunk_id
- metadata
title: Chunk
description: >-
@ -13527,6 +13988,19 @@ tags:
description: >-
APIs for creating and interacting with agentic systems.
x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks
description: ''
- name: Conversations
@ -13601,6 +14075,7 @@ x-tagGroups:
- name: Operations
tags:
- Agents
- Batches
- Benchmarks
- Conversations
- DatasetIO

View file

@ -285,7 +285,6 @@ exclude = [
"^src/llama_stack/models/llama/llama3/interface\\.py$",
"^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
"^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
"^src/llama_stack/providers/inline/agents/meta_reference/",
"^src/llama_stack/providers/inline/datasetio/localfs/",
"^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
"^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",

View file

@ -313,8 +313,20 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
fi
echo "Using image: $IMAGE_NAME"
docker run -d --network host --name "$container_name" \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
# On macOS/Darwin, --network host doesn't work as expected due to Docker running in a VM
# Use regular port mapping instead
NETWORK_MODE=""
PORT_MAPPINGS=""
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
NETWORK_MODE="--network host"
else
# On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
echo "Using bridge networking with port mapping (non-Linux)"
fi
docker run -d $NETWORK_MODE --name "$container_name" \
$PORT_MAPPINGS \
$DOCKER_ENV_VARS \
"$IMAGE_NAME" \
--port $LLAMA_STACK_PORT

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from collections.abc import Sequence
from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field, model_validator
@ -202,7 +203,7 @@ class OpenAIResponseMessage(BaseModel):
scenarios.
"""
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]
content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
type: Literal["message"] = "message"
@ -254,10 +255,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
"""
id: str
queries: list[str]
queries: Sequence[str]
status: str
type: Literal["file_search_call"] = "file_search_call"
results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
@json_schema_type
@ -597,7 +598,7 @@ class OpenAIResponseObject(BaseModel):
id: str
model: str
object: Literal["response"] = "response"
output: list[OpenAIResponseOutput]
output: Sequence[OpenAIResponseOutput]
parallel_tool_calls: bool = False
previous_response_id: str | None = None
prompt: OpenAIResponsePrompt | None = None
@ -607,7 +608,7 @@ class OpenAIResponseObject(BaseModel):
# before the field was added. New responses will have this set always.
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
top_p: float | None = None
tools: list[OpenAIResponseTool] | None = None
tools: Sequence[OpenAIResponseTool] | None = None
truncation: str | None = None
usage: OpenAIResponseUsage | None = None
instructions: str | None = None
@ -1315,7 +1316,7 @@ class ListOpenAIResponseInputItem(BaseModel):
:param object: Object type identifier, always "list"
"""
data: list[OpenAIResponseInput]
data: Sequence[OpenAIResponseInput]
object: Literal["list"] = "list"
@ -1326,7 +1327,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
:param input: List of input items that led to this response
"""
input: list[OpenAIResponseInput]
input: Sequence[OpenAIResponseInput]
def to_response_object(self) -> OpenAIResponseObject:
"""Convert to OpenAIResponseObject by excluding input field."""
@ -1344,7 +1345,7 @@ class ListOpenAIResponseObject(BaseModel):
:param object: Object type identifier, always "list"
"""
data: list[OpenAIResponseObjectWithInput]
data: Sequence[OpenAIResponseObjectWithInput]
has_more: bool
first_id: str
last_id: str

View file

@ -8,7 +8,6 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import uuid
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
from fastapi import Body
@ -18,7 +17,6 @@ from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
from llama_stack.schema_utils import json_schema_type, webmethod
from llama_stack.strong_typing.schema import register_schema
@ -61,38 +59,19 @@ class Chunk(BaseModel):
"""
A chunk of content that can be inserted into a vector database.
:param content: The content of the chunk, which can be interleaved text, images, or other types.
:param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
:param chunk_id: Unique identifier for the chunk. Must be provided explicitly.
:param metadata: Metadata associated with the chunk that will be used in the model context during inference.
:param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
:param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
:param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
The `chunk_metadata` is required backend functionality.
"""
content: InterleavedContent
chunk_id: str
metadata: dict[str, Any] = Field(default_factory=dict)
embedding: list[float] | None = None
# The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
chunk_metadata: ChunkMetadata | None = None
model_config = {"populate_by_name": True}
def model_post_init(self, __context):
# Extract chunk_id from metadata if present
if self.metadata and "chunk_id" in self.metadata:
self.stored_chunk_id = self.metadata.pop("chunk_id")
@property
def chunk_id(self) -> str:
"""Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
if self.stored_chunk_id:
return self.stored_chunk_id
if "document_id" in self.metadata:
return generate_chunk_id(self.metadata["document_id"], str(self.content))
return generate_chunk_id(str(uuid.uuid4()), str(self.content))
@property
def document_id(self) -> str | None:
"""Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence."""

View file

@ -13,6 +13,8 @@ from llama_stack.core.datatypes import (
ModelWithOwner,
RegistryEntrySource,
)
from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.log import get_logger
from .common import CommonRoutingTableImpl, lookup_model
@ -42,11 +44,90 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
await self.update_registered_models(provider_id, models)
async def _get_dynamic_models_from_provider_data(self) -> list[Model]:
"""
Fetch models from providers that have credentials in the current request's provider_data.
This allows users to see models available to them from providers that require
per-request API keys (via X-LlamaStack-Provider-Data header).
Returns models with fully qualified identifiers (provider_id/model_id) but does NOT
cache them in the registry since they are user-specific.
"""
provider_data = PROVIDER_DATA_VAR.get()
if not provider_data:
return []
dynamic_models = []
for provider_id, provider in self.impls_by_provider_id.items():
# Check if this provider supports provider_data
if not isinstance(provider, NeedsRequestProviderData):
continue
# Check if provider has a validator (some providers like ollama don't need per-request credentials)
spec = getattr(provider, "__provider_spec__", None)
if not spec or not getattr(spec, "provider_data_validator", None):
continue
# Validate provider_data silently - we're speculatively checking all providers
# so validation failures are expected when user didn't provide keys for this provider
try:
validator = instantiate_class_type(spec.provider_data_validator)
validator(**provider_data)
except Exception:
# User didn't provide credentials for this provider - skip silently
continue
# Validation succeeded! User has credentials for this provider
# Now try to list models
try:
models = await provider.list_models()
if not models:
continue
# Ensure models have fully qualified identifiers with provider_id prefix
for model in models:
# Only add prefix if model identifier doesn't already have it
if not model.identifier.startswith(f"{provider_id}/"):
model.identifier = f"{provider_id}/{model.provider_resource_id}"
dynamic_models.append(model)
logger.debug(f"Fetched {len(models)} models from provider {provider_id} using provider_data")
except Exception as e:
logger.debug(f"Failed to list models from provider {provider_id} with provider_data: {e}")
continue
return dynamic_models
async def list_models(self) -> ListModelsResponse:
return ListModelsResponse(data=await self.get_all_with_type("model"))
# Get models from registry
registry_models = await self.get_all_with_type("model")
# Get additional models available via provider_data (user-specific, not cached)
dynamic_models = await self._get_dynamic_models_from_provider_data()
# Combine, avoiding duplicates (registry takes precedence)
registry_identifiers = {m.identifier for m in registry_models}
unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
return ListModelsResponse(data=registry_models + unique_dynamic_models)
async def openai_list_models(self) -> OpenAIListModelsResponse:
models = await self.get_all_with_type("model")
# Get models from registry
registry_models = await self.get_all_with_type("model")
# Get additional models available via provider_data (user-specific, not cached)
dynamic_models = await self._get_dynamic_models_from_provider_data()
# Combine, avoiding duplicates (registry takes precedence)
registry_identifiers = {m.identifier for m in registry_models}
unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
all_models = registry_models + unique_dynamic_models
openai_models = [
OpenAIModel(
id=model.identifier,
@ -54,7 +135,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
created=int(time.time()),
owned_by="llama_stack",
)
for model in models
for model in all_models
]
return OpenAIListModelsResponse(data=openai_models)

View file

@ -14,6 +14,7 @@ from typing import Any
import yaml
from llama_stack.apis.agents import Agents
from llama_stack.apis.batches import Batches
from llama_stack.apis.benchmarks import Benchmarks
from llama_stack.apis.conversations import Conversations
from llama_stack.apis.datasetio import DatasetIO
@ -63,6 +64,7 @@ class LlamaStack(
Providers,
Inference,
Agents,
Batches,
Safety,
SyntheticDataGeneration,
Datasets,

View file

@ -11,6 +11,7 @@ import uuid
import warnings
from collections.abc import AsyncGenerator
from datetime import UTC, datetime
from typing import Any, cast
import httpx
@ -125,12 +126,12 @@ class ChatAgent(ShieldRunnerMixin):
)
def turn_to_messages(self, turn: Turn) -> list[Message]:
messages = []
messages: list[Message] = []
# NOTE: if a toolcall response is in a step, we do not add it when processing the input messages
tool_call_ids = set()
for step in turn.steps:
if step.step_type == StepType.tool_execution.value:
if step.step_type == StepType.tool_execution.value and isinstance(step, ToolExecutionStep):
for response in step.tool_responses:
tool_call_ids.add(response.call_id)
@ -149,9 +150,9 @@ class ChatAgent(ShieldRunnerMixin):
messages.append(msg)
for step in turn.steps:
if step.step_type == StepType.inference.value:
if step.step_type == StepType.inference.value and isinstance(step, InferenceStep):
messages.append(step.model_response)
elif step.step_type == StepType.tool_execution.value:
elif step.step_type == StepType.tool_execution.value and isinstance(step, ToolExecutionStep):
for response in step.tool_responses:
messages.append(
ToolResponseMessage(
@ -159,8 +160,8 @@ class ChatAgent(ShieldRunnerMixin):
content=response.content,
)
)
elif step.step_type == StepType.shield_call.value:
if step.violation:
elif step.step_type == StepType.shield_call.value and isinstance(step, ShieldCallStep):
if step.violation and step.violation.user_message:
# CompletionMessage itself in the ShieldResponse
messages.append(
CompletionMessage(
@ -174,7 +175,7 @@ class ChatAgent(ShieldRunnerMixin):
return await self.storage.create_session(name)
async def get_messages_from_turns(self, turns: list[Turn]) -> list[Message]:
messages = []
messages: list[Message] = []
if self.agent_config.instructions != "":
messages.append(SystemMessage(content=self.agent_config.instructions))
@ -231,7 +232,9 @@ class ChatAgent(ShieldRunnerMixin):
steps = []
messages = await self.get_messages_from_turns(turns)
if is_resume:
assert isinstance(request, AgentTurnResumeRequest)
tool_response_messages = [
ToolResponseMessage(call_id=x.call_id, content=x.content) for x in request.tool_responses
]
@ -252,42 +255,52 @@ class ChatAgent(ShieldRunnerMixin):
in_progress_tool_call_step = await self.storage.get_in_progress_tool_call_step(
request.session_id, request.turn_id
)
now = datetime.now(UTC).isoformat()
now_dt = datetime.now(UTC)
tool_execution_step = ToolExecutionStep(
step_id=(in_progress_tool_call_step.step_id if in_progress_tool_call_step else str(uuid.uuid4())),
turn_id=request.turn_id,
tool_calls=(in_progress_tool_call_step.tool_calls if in_progress_tool_call_step else []),
tool_responses=request.tool_responses,
completed_at=now,
started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now),
completed_at=now_dt,
started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now_dt),
)
steps.append(tool_execution_step)
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepCompletePayload(
step_type=StepType.tool_execution.value,
step_type=StepType.tool_execution,
step_id=tool_execution_step.step_id,
step_details=tool_execution_step,
)
)
)
input_messages = last_turn.input_messages
# Cast needed due to list invariance - last_turn.input_messages is the right type
input_messages = last_turn.input_messages # type: ignore[assignment]
turn_id = request.turn_id
actual_turn_id = request.turn_id
start_time = last_turn.started_at
else:
assert isinstance(request, AgentTurnCreateRequest)
messages.extend(request.messages)
start_time = datetime.now(UTC).isoformat()
input_messages = request.messages
start_time = datetime.now(UTC)
# Cast needed due to list invariance - request.messages is the right type
input_messages = request.messages # type: ignore[assignment]
# Use the generated turn_id from beginning of function
actual_turn_id = turn_id if turn_id else str(uuid.uuid4())
output_message = None
req_documents = request.documents if isinstance(request, AgentTurnCreateRequest) and not is_resume else None
req_sampling = (
self.agent_config.sampling_params if self.agent_config.sampling_params is not None else SamplingParams()
)
async for chunk in self.run(
session_id=request.session_id,
turn_id=turn_id,
turn_id=actual_turn_id,
input_messages=messages,
sampling_params=self.agent_config.sampling_params,
sampling_params=req_sampling,
stream=request.stream,
documents=request.documents if not is_resume else None,
documents=req_documents,
):
if isinstance(chunk, CompletionMessage):
output_message = chunk
@ -295,20 +308,23 @@ class ChatAgent(ShieldRunnerMixin):
assert isinstance(chunk, AgentTurnResponseStreamChunk), f"Unexpected type {type(chunk)}"
event = chunk.event
if event.payload.event_type == AgentTurnResponseEventType.step_complete.value:
steps.append(event.payload.step_details)
if event.payload.event_type == AgentTurnResponseEventType.step_complete.value and hasattr(
event.payload, "step_details"
):
step_details = event.payload.step_details
steps.append(step_details)
yield chunk
assert output_message is not None
turn = Turn(
turn_id=turn_id,
turn_id=actual_turn_id,
session_id=request.session_id,
input_messages=input_messages,
input_messages=input_messages, # type: ignore[arg-type]
output_message=output_message,
started_at=start_time,
completed_at=datetime.now(UTC).isoformat(),
completed_at=datetime.now(UTC),
steps=steps,
)
await self.storage.add_turn_to_session(request.session_id, turn)
@ -345,9 +361,9 @@ class ChatAgent(ShieldRunnerMixin):
# return a "final value" for the `yield from` statement. we simulate that by yielding a
# final boolean (to see whether an exception happened) and then explicitly testing for it.
if len(self.input_shields) > 0:
if self.input_shields:
async for res in self.run_multiple_shields_wrapper(
turn_id, input_messages, self.input_shields, "user-input"
turn_id, cast(list[OpenAIMessageParam], input_messages), self.input_shields, "user-input"
):
if isinstance(res, bool):
return
@ -374,9 +390,9 @@ class ChatAgent(ShieldRunnerMixin):
# for output shields run on the full input and output combination
messages = input_messages + [final_response]
if len(self.output_shields) > 0:
if self.output_shields:
async for res in self.run_multiple_shields_wrapper(
turn_id, messages, self.output_shields, "assistant-output"
turn_id, cast(list[OpenAIMessageParam], messages), self.output_shields, "assistant-output"
):
if isinstance(res, bool):
return
@ -388,7 +404,7 @@ class ChatAgent(ShieldRunnerMixin):
async def run_multiple_shields_wrapper(
self,
turn_id: str,
messages: list[Message],
messages: list[OpenAIMessageParam],
shields: list[str],
touchpoint: str,
) -> AsyncGenerator:
@ -402,12 +418,12 @@ class ChatAgent(ShieldRunnerMixin):
return
step_id = str(uuid.uuid4())
shield_call_start_time = datetime.now(UTC).isoformat()
shield_call_start_time = datetime.now(UTC)
try:
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepStartPayload(
step_type=StepType.shield_call.value,
step_type=StepType.shield_call,
step_id=step_id,
metadata=dict(touchpoint=touchpoint),
)
@ -419,14 +435,14 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepCompletePayload(
step_type=StepType.shield_call.value,
step_type=StepType.shield_call,
step_id=step_id,
step_details=ShieldCallStep(
step_id=step_id,
turn_id=turn_id,
violation=e.violation,
started_at=shield_call_start_time,
completed_at=datetime.now(UTC).isoformat(),
completed_at=datetime.now(UTC),
),
)
)
@ -443,14 +459,14 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepCompletePayload(
step_type=StepType.shield_call.value,
step_type=StepType.shield_call,
step_id=step_id,
step_details=ShieldCallStep(
step_id=step_id,
turn_id=turn_id,
violation=None,
started_at=shield_call_start_time,
completed_at=datetime.now(UTC).isoformat(),
completed_at=datetime.now(UTC),
),
)
)
@ -496,21 +512,22 @@ class ChatAgent(ShieldRunnerMixin):
else:
self.tool_name_to_args[tool_name]["vector_store_ids"].append(session_info.vector_store_id)
output_attachments = []
output_attachments: list[Attachment] = []
n_iter = await self.storage.get_num_infer_iters_in_turn(session_id, turn_id) or 0
# Build a map of custom tools to their definitions for faster lookup
client_tools = {}
if self.agent_config.client_tools:
for tool in self.agent_config.client_tools:
client_tools[tool.name] = tool
while True:
step_id = str(uuid.uuid4())
inference_start_time = datetime.now(UTC).isoformat()
inference_start_time = datetime.now(UTC)
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepStartPayload(
step_type=StepType.inference.value,
step_type=StepType.inference,
step_id=step_id,
)
)
@ -538,7 +555,7 @@ class ChatAgent(ShieldRunnerMixin):
else:
return value
def _add_type(openai_msg: dict) -> OpenAIMessageParam:
def _add_type(openai_msg: Any) -> OpenAIMessageParam:
# Serialize any nested Pydantic models to plain dicts
openai_msg = _serialize_nested(openai_msg)
@ -588,7 +605,7 @@ class ChatAgent(ShieldRunnerMixin):
messages=openai_messages,
tools=openai_tools if openai_tools else None,
tool_choice=tool_choice,
response_format=self.agent_config.response_format,
response_format=self.agent_config.response_format, # type: ignore[arg-type]
temperature=temperature,
top_p=top_p,
max_tokens=max_tokens,
@ -598,7 +615,8 @@ class ChatAgent(ShieldRunnerMixin):
# Convert OpenAI stream back to Llama Stack format
response_stream = convert_openai_chat_completion_stream(
openai_stream, enable_incremental_tool_calls=True
openai_stream, # type: ignore[arg-type]
enable_incremental_tool_calls=True,
)
async for chunk in response_stream:
@ -620,7 +638,7 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepProgressPayload(
step_type=StepType.inference.value,
step_type=StepType.inference,
step_id=step_id,
delta=delta,
)
@ -633,7 +651,7 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepProgressPayload(
step_type=StepType.inference.value,
step_type=StepType.inference,
step_id=step_id,
delta=delta,
)
@ -651,7 +669,9 @@ class ChatAgent(ShieldRunnerMixin):
output_attr = json.dumps(
{
"content": content,
"tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
"tool_calls": [
json.loads(t.model_dump_json()) for t in tool_calls if isinstance(t, ToolCall)
],
}
)
span.set_attribute("output", output_attr)
@ -667,16 +687,18 @@ class ChatAgent(ShieldRunnerMixin):
if tool_calls:
content = ""
# Filter out string tool calls for CompletionMessage (only keep ToolCall objects)
valid_tool_calls = [t for t in tool_calls if isinstance(t, ToolCall)]
message = CompletionMessage(
content=content,
stop_reason=stop_reason,
tool_calls=tool_calls,
tool_calls=valid_tool_calls if valid_tool_calls else None,
)
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepCompletePayload(
step_type=StepType.inference.value,
step_type=StepType.inference,
step_id=step_id,
step_details=InferenceStep(
# somewhere deep, we are re-assigning message or closing over some
@ -686,13 +708,14 @@ class ChatAgent(ShieldRunnerMixin):
turn_id=turn_id,
model_response=copy.deepcopy(message),
started_at=inference_start_time,
completed_at=datetime.now(UTC).isoformat(),
completed_at=datetime.now(UTC),
),
)
)
)
if n_iter >= self.agent_config.max_infer_iters:
max_iters = self.agent_config.max_infer_iters if self.agent_config.max_infer_iters is not None else 10
if n_iter >= max_iters:
logger.info(f"done with MAX iterations ({n_iter}), exiting.")
# NOTE: mark end_of_turn to indicate to client that we are done with the turn
# Do not continue the tool call loop after this point
@ -705,14 +728,16 @@ class ChatAgent(ShieldRunnerMixin):
yield message
break
if len(message.tool_calls) == 0:
if not message.tool_calls or len(message.tool_calls) == 0:
if stop_reason == StopReason.end_of_turn:
# TODO: UPDATE RETURN TYPE TO SEND A TUPLE OF (MESSAGE, ATTACHMENTS)
if len(output_attachments) > 0:
if isinstance(message.content, list):
message.content += output_attachments
# List invariance - attachments are compatible at runtime
message.content += output_attachments # type: ignore[arg-type]
else:
message.content = [message.content] + output_attachments
# List invariance - attachments are compatible at runtime
message.content = [message.content] + output_attachments # type: ignore[assignment]
yield message
else:
logger.debug(f"completion message with EOM (iter: {n_iter}): {str(message)}")
@ -725,6 +750,7 @@ class ChatAgent(ShieldRunnerMixin):
non_client_tool_calls = []
# Separate client and non-client tool calls
if message.tool_calls:
for tool_call in message.tool_calls:
if tool_call.tool_name in client_tools:
client_tool_calls.append(tool_call)
@ -737,7 +763,7 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepStartPayload(
step_type=StepType.tool_execution.value,
step_type=StepType.tool_execution,
step_id=step_id,
)
)
@ -746,7 +772,7 @@ class ChatAgent(ShieldRunnerMixin):
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepProgressPayload(
step_type=StepType.tool_execution.value,
step_type=StepType.tool_execution,
step_id=step_id,
delta=ToolCallDelta(
parse_status=ToolCallParseStatus.in_progress,
@ -766,7 +792,7 @@ class ChatAgent(ShieldRunnerMixin):
if self.telemetry_enabled
else {},
) as span:
tool_execution_start_time = datetime.now(UTC).isoformat()
tool_execution_start_time = datetime.now(UTC)
tool_result = await self.execute_tool_call_maybe(
session_id,
tool_call,
@ -796,14 +822,14 @@ class ChatAgent(ShieldRunnerMixin):
)
],
started_at=tool_execution_start_time,
completed_at=datetime.now(UTC).isoformat(),
completed_at=datetime.now(UTC),
)
# Yield the step completion event
yield AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseStepCompletePayload(
step_type=StepType.tool_execution.value,
step_type=StepType.tool_execution,
step_id=step_id,
step_details=tool_execution_step,
)
@ -833,7 +859,7 @@ class ChatAgent(ShieldRunnerMixin):
turn_id=turn_id,
tool_calls=client_tool_calls,
tool_responses=[],
started_at=datetime.now(UTC).isoformat(),
started_at=datetime.now(UTC),
),
)
@ -868,9 +894,10 @@ class ChatAgent(ShieldRunnerMixin):
toolgroup_to_args = toolgroup_to_args or {}
tool_name_to_def = {}
tool_name_to_def: dict[str, ToolDefinition] = {}
tool_name_to_args = {}
if self.agent_config.client_tools:
for tool_def in self.agent_config.client_tools:
if tool_name_to_def.get(tool_def.name, None):
raise ValueError(f"Tool {tool_def.name} already exists")
@ -908,15 +935,17 @@ class ChatAgent(ShieldRunnerMixin):
else:
identifier = None
if tool_name_to_def.get(identifier, None):
raise ValueError(f"Tool {identifier} already exists")
if identifier:
tool_name_to_def[identifier] = ToolDefinition(
tool_name=identifier,
# Convert BuiltinTool to string for dictionary key
identifier_str = identifier.value if isinstance(identifier, BuiltinTool) else identifier
if tool_name_to_def.get(identifier_str, None):
raise ValueError(f"Tool {identifier_str} already exists")
tool_name_to_def[identifier_str] = ToolDefinition(
tool_name=identifier_str,
description=tool_def.description,
input_schema=tool_def.input_schema,
)
tool_name_to_args[identifier] = toolgroup_to_args.get(toolgroup_name, {})
tool_name_to_args[identifier_str] = toolgroup_to_args.get(toolgroup_name, {})
self.tool_defs, self.tool_name_to_args = (
list(tool_name_to_def.values()),
@ -966,7 +995,9 @@ class ChatAgent(ShieldRunnerMixin):
except json.JSONDecodeError as e:
raise ValueError(f"Failed to parse arguments for tool call: {tool_call.arguments}") from e
result = await self.tool_runtime_api.invoke_tool(
result = cast(
ToolInvocationResult,
await self.tool_runtime_api.invoke_tool(
tool_name=tool_name_str,
kwargs={
"session_id": session_id,
@ -974,6 +1005,7 @@ class ChatAgent(ShieldRunnerMixin):
**args,
**self.tool_name_to_args.get(tool_name_str, {}),
},
),
)
logger.debug(f"tool call {tool_name_str} completed with result: {result}")
return result
@ -1017,7 +1049,7 @@ def _interpret_content_as_attachment(
snippet = match.group(1)
data = json.loads(snippet)
return Attachment(
url=URL(uri="file://" + data["filepath"]),
content=URL(uri="file://" + data["filepath"]),
mime_type=data["mimetype"],
)

View file

@ -21,6 +21,7 @@ from llama_stack.apis.agents import (
Document,
ListOpenAIResponseInputItem,
ListOpenAIResponseObject,
OpenAIDeleteResponseObject,
OpenAIResponseInput,
OpenAIResponseInputTool,
OpenAIResponseObject,
@ -141,7 +142,7 @@ class MetaReferenceAgentsImpl(Agents):
persistence_store=(
self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store
),
created_at=agent_info.created_at,
created_at=agent_info.created_at.isoformat(),
policy=self.policy,
telemetry_enabled=self.telemetry_enabled,
)
@ -163,9 +164,9 @@ class MetaReferenceAgentsImpl(Agents):
agent_id: str,
session_id: str,
messages: list[UserMessage | ToolResponseMessage],
toolgroups: list[AgentToolGroup] | None = None,
documents: list[Document] | None = None,
stream: bool | None = False,
documents: list[Document] | None = None,
toolgroups: list[AgentToolGroup] | None = None,
tool_config: ToolConfig | None = None,
) -> AsyncGenerator:
request = AgentTurnCreateRequest(
@ -221,6 +222,8 @@ class MetaReferenceAgentsImpl(Agents):
async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn:
agent = await self._get_agent_impl(agent_id)
turn = await agent.storage.get_session_turn(session_id, turn_id)
if turn is None:
raise ValueError(f"Turn {turn_id} not found in session {session_id}")
return turn
async def get_agents_step(self, agent_id: str, session_id: str, turn_id: str, step_id: str) -> AgentStepResponse:
@ -232,13 +235,15 @@ class MetaReferenceAgentsImpl(Agents):
async def get_agents_session(
self,
agent_id: str,
session_id: str,
agent_id: str,
turn_ids: list[str] | None = None,
) -> Session:
agent = await self._get_agent_impl(agent_id)
session_info = await agent.storage.get_session_info(session_id)
if session_info is None:
raise ValueError(f"Session {session_id} not found")
turns = await agent.storage.get_session_turns(session_id)
if turn_ids:
turns = [turn for turn in turns if turn.turn_id in turn_ids]
@ -249,7 +254,7 @@ class MetaReferenceAgentsImpl(Agents):
started_at=session_info.started_at,
)
async def delete_agents_session(self, agent_id: str, session_id: str) -> None:
async def delete_agents_session(self, session_id: str, agent_id: str) -> None:
agent = await self._get_agent_impl(agent_id)
# Delete turns first, then the session
@ -302,7 +307,7 @@ class MetaReferenceAgentsImpl(Agents):
agent = Agent(
agent_id=agent_id,
agent_config=chat_agent.agent_config,
created_at=chat_agent.created_at,
created_at=datetime.fromisoformat(chat_agent.created_at),
)
return agent
@ -323,6 +328,7 @@ class MetaReferenceAgentsImpl(Agents):
self,
response_id: str,
) -> OpenAIResponseObject:
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
return await self.openai_responses_impl.get_openai_response(response_id)
async def create_openai_response(
@ -342,7 +348,8 @@ class MetaReferenceAgentsImpl(Agents):
max_infer_iters: int | None = 10,
guardrails: list[ResponseGuardrail] | None = None,
) -> OpenAIResponseObject:
return await self.openai_responses_impl.create_openai_response(
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
result = await self.openai_responses_impl.create_openai_response(
input,
model,
prompt,
@ -358,6 +365,7 @@ class MetaReferenceAgentsImpl(Agents):
max_infer_iters,
guardrails,
)
return result # type: ignore[no-any-return]
async def list_openai_responses(
self,
@ -366,6 +374,7 @@ class MetaReferenceAgentsImpl(Agents):
model: str | None = None,
order: Order | None = Order.desc,
) -> ListOpenAIResponseObject:
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
return await self.openai_responses_impl.list_openai_responses(after, limit, model, order)
async def list_openai_response_input_items(
@ -377,9 +386,11 @@ class MetaReferenceAgentsImpl(Agents):
limit: int | None = 20,
order: Order | None = Order.desc,
) -> ListOpenAIResponseInputItem:
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
return await self.openai_responses_impl.list_openai_response_input_items(
response_id, after, before, include, limit, order
)
async def delete_openai_response(self, response_id: str) -> None:
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
return await self.openai_responses_impl.delete_openai_response(response_id)

View file

@ -6,12 +6,14 @@
import json
import uuid
from dataclasses import dataclass
from datetime import UTC, datetime
from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn
from llama_stack.apis.common.errors import SessionNotFoundError
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
from llama_stack.core.access_control.datatypes import AccessRule
from llama_stack.core.access_control.conditions import User as ProtocolUser
from llama_stack.core.access_control.datatypes import AccessRule, Action
from llama_stack.core.datatypes import User
from llama_stack.core.request_headers import get_authenticated_user
from llama_stack.log import get_logger
@ -33,6 +35,15 @@ class AgentInfo(AgentConfig):
created_at: datetime
@dataclass
class SessionResource:
"""Concrete implementation of ProtectedResource for session access control."""
type: str
identifier: str
owner: ProtocolUser # Use the protocol type for structural compatibility
class AgentPersistence:
def __init__(self, agent_id: str, kvstore: KVStore, policy: list[AccessRule]):
self.agent_id = agent_id
@ -53,8 +64,15 @@ class AgentPersistence:
turns=[],
identifier=name, # should this be qualified in any way?
)
if not is_action_allowed(self.policy, "create", session_info, user):
raise AccessDeniedError("create", session_info, user)
# Only perform access control if we have an authenticated user
if user is not None and session_info.identifier is not None:
resource = SessionResource(
type=session_info.type,
identifier=session_info.identifier,
owner=user,
)
if not is_action_allowed(self.policy, Action.CREATE, resource, user):
raise AccessDeniedError(Action.CREATE, resource, user)
await self.kvstore.set(
key=f"session:{self.agent_id}:{session_id}",
@ -62,7 +80,7 @@ class AgentPersistence:
)
return session_id
async def get_session_info(self, session_id: str) -> AgentSessionInfo:
async def get_session_info(self, session_id: str) -> AgentSessionInfo | None:
value = await self.kvstore.get(
key=f"session:{self.agent_id}:{session_id}",
)
@ -83,7 +101,22 @@ class AgentPersistence:
if not hasattr(session_info, "access_attributes") and not hasattr(session_info, "owner"):
return True
return is_action_allowed(self.policy, "read", session_info, get_authenticated_user())
# Get current user - if None, skip access control (e.g., in tests)
user = get_authenticated_user()
if user is None:
return True
# Access control requires identifier and owner to be set
if session_info.identifier is None or session_info.owner is None:
return True
# At this point, both identifier and owner are guaranteed to be non-None
resource = SessionResource(
type=session_info.type,
identifier=session_info.identifier,
owner=session_info.owner,
)
return is_action_allowed(self.policy, Action.READ, resource, user)
async def get_session_if_accessible(self, session_id: str) -> AgentSessionInfo | None:
"""Get session info if the user has access to it. For internal use by sub-session methods."""

View file

@ -91,7 +91,8 @@ class OpenAIResponsesImpl:
input: str | list[OpenAIResponseInput],
previous_response: _OpenAIResponseObjectWithInputAndMessages,
):
new_input_items = previous_response.input.copy()
# Convert Sequence to list for mutation
new_input_items = list(previous_response.input)
new_input_items.extend(previous_response.output)
if isinstance(input, str):
@ -107,7 +108,7 @@ class OpenAIResponsesImpl:
tools: list[OpenAIResponseInputTool] | None,
previous_response_id: str | None,
conversation: str | None,
) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam]]:
) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam], ToolContext]:
"""Process input with optional previous response context.
Returns:
@ -208,6 +209,9 @@ class OpenAIResponsesImpl:
messages: list[OpenAIMessageParam],
) -> None:
new_input_id = f"msg_{uuid.uuid4()}"
# Type input_items_data as the full OpenAIResponseInput union to avoid list invariance issues
input_items_data: list[OpenAIResponseInput] = []
if isinstance(input, str):
# synthesize a message from the input string
input_content = OpenAIResponseInputMessageContentText(text=input)
@ -219,7 +223,6 @@ class OpenAIResponsesImpl:
input_items_data = [input_content_item]
else:
# we already have a list of messages
input_items_data = []
for input_item in input:
if isinstance(input_item, OpenAIResponseMessage):
# These may or may not already have an id, so dump to dict, check for id, and add if missing
@ -251,7 +254,7 @@ class OpenAIResponsesImpl:
tools: list[OpenAIResponseInputTool] | None = None,
include: list[str] | None = None,
max_infer_iters: int | None = 10,
guardrails: list[ResponseGuardrailSpec] | None = None,
guardrails: list[str | ResponseGuardrailSpec] | None = None,
):
stream = bool(stream)
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
@ -289,7 +292,8 @@ class OpenAIResponsesImpl:
failed_response = None
async for stream_chunk in stream_gen:
if stream_chunk.type in {"response.completed", "response.incomplete"}:
match stream_chunk.type:
case "response.completed" | "response.incomplete":
if final_response is not None:
raise ValueError(
"The response stream produced multiple terminal responses! "
@ -297,8 +301,10 @@ class OpenAIResponsesImpl:
)
final_response = stream_chunk.response
final_event_type = stream_chunk.type
elif stream_chunk.type == "response.failed":
case "response.failed":
failed_response = stream_chunk.response
case _:
pass # Other event types don't have .response
if failed_response is not None:
error_message = (
@ -326,6 +332,11 @@ class OpenAIResponsesImpl:
max_infer_iters: int | None = 10,
guardrail_ids: list[str] | None = None,
) -> AsyncIterator[OpenAIResponseObjectStream]:
# These should never be None when called from create_openai_response (which sets defaults)
# but we assert here to help mypy understand the types
assert text is not None, "text must not be None"
assert max_infer_iters is not None, "max_infer_iters must not be None"
# Input preprocessing
all_input, messages, tool_context = await self._process_input_with_previous_response(
input, tools, previous_response_id, conversation
@ -368,16 +379,19 @@ class OpenAIResponsesImpl:
final_response = None
failed_response = None
output_items = []
# Type as ConversationItem to avoid list invariance issues
output_items: list[ConversationItem] = []
async for stream_chunk in orchestrator.create_response():
if stream_chunk.type in {"response.completed", "response.incomplete"}:
match stream_chunk.type:
case "response.completed" | "response.incomplete":
final_response = stream_chunk.response
elif stream_chunk.type == "response.failed":
case "response.failed":
failed_response = stream_chunk.response
if stream_chunk.type == "response.output_item.done":
case "response.output_item.done":
item = stream_chunk.item
output_items.append(item)
case _:
pass # Other event types
# Store and sync before yielding terminal events
# This ensures the storage/syncing happens even if the consumer breaks after receiving the event
@ -410,7 +424,8 @@ class OpenAIResponsesImpl:
self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem]
) -> None:
"""Sync content and response messages to the conversation."""
conversation_items = []
# Type as ConversationItem union to avoid list invariance issues
conversation_items: list[ConversationItem] = []
if isinstance(input, str):
conversation_items.append(

View file

@ -111,7 +111,7 @@ class StreamingResponseOrchestrator:
text: OpenAIResponseText,
max_infer_iters: int,
tool_executor, # Will be the tool execution logic from the main class
instructions: str,
instructions: str | None,
safety_api,
guardrail_ids: list[str] | None = None,
prompt: OpenAIResponsePrompt | None = None,
@ -128,7 +128,9 @@ class StreamingResponseOrchestrator:
self.prompt = prompt
self.sequence_number = 0
# Store MCP tool mapping that gets built during tool processing
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ctx.tool_context.previous_tools or {}
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
ctx.tool_context.previous_tools if ctx.tool_context else {}
)
# Track final messages after all tool executions
self.final_messages: list[OpenAIMessageParam] = []
# mapping for annotations
@ -229,7 +231,8 @@ class StreamingResponseOrchestrator:
params = OpenAIChatCompletionRequestWithExtraBody(
model=self.ctx.model,
messages=messages,
tools=self.ctx.chat_tools,
# Pydantic models are dict-compatible but mypy treats them as distinct types
tools=self.ctx.chat_tools, # type: ignore[arg-type]
stream=True,
temperature=self.ctx.temperature,
response_format=response_format,
@ -272,7 +275,12 @@ class StreamingResponseOrchestrator:
# Handle choices with no tool calls
for choice in current_response.choices:
if not (choice.message.tool_calls and self.ctx.response_tools):
has_tool_calls = (
isinstance(choice.message, OpenAIAssistantMessageParam)
and choice.message.tool_calls
and self.ctx.response_tools
)
if not has_tool_calls:
output_messages.append(
await convert_chat_choice_to_response_message(
choice,
@ -722,7 +730,10 @@ class StreamingResponseOrchestrator:
)
# Accumulate arguments for final response (only for subsequent chunks)
if not is_new_tool_call:
if not is_new_tool_call and response_tool_call is not None:
# Both should have functions since we're inside the tool_call.function check above
assert response_tool_call.function is not None
assert tool_call.function is not None
response_tool_call.function.arguments = (
response_tool_call.function.arguments or ""
) + tool_call.function.arguments
@ -747,10 +758,13 @@ class StreamingResponseOrchestrator:
for tool_call_index in sorted(chat_response_tool_calls.keys()):
tool_call = chat_response_tool_calls[tool_call_index]
# Ensure that arguments, if sent back to the inference provider, are not None
if tool_call.function:
tool_call.function.arguments = tool_call.function.arguments or "{}"
tool_call_item_id = tool_call_item_ids[tool_call_index]
final_arguments = tool_call.function.arguments
tool_call_name = chat_response_tool_calls[tool_call_index].function.name
final_arguments: str = tool_call.function.arguments or "{}" if tool_call.function else "{}"
func = chat_response_tool_calls[tool_call_index].function
tool_call_name = func.name if func else ""
# Check if this is an MCP tool call
is_mcp_tool = tool_call_name and tool_call_name in self.mcp_tool_to_server
@ -894,12 +908,11 @@ class StreamingResponseOrchestrator:
self.sequence_number += 1
if tool_call.function.name and tool_call.function.name in self.mcp_tool_to_server:
item = OpenAIResponseOutputMessageMCPCall(
item: OpenAIResponseOutput = OpenAIResponseOutputMessageMCPCall(
arguments="",
name=tool_call.function.name,
id=matching_item_id,
server_label=self.mcp_tool_to_server[tool_call.function.name].server_label,
status="in_progress",
)
elif tool_call.function.name == "web_search":
item = OpenAIResponseOutputMessageWebSearchToolCall(
@ -1008,7 +1021,7 @@ class StreamingResponseOrchestrator:
description=tool.description,
input_schema=tool.input_schema,
)
return convert_tooldef_to_openai_tool(tool_def)
return convert_tooldef_to_openai_tool(tool_def) # type: ignore[return-value] # Returns dict but ChatCompletionToolParam expects TypedDict
# Initialize chat_tools if not already set
if self.ctx.chat_tools is None:
@ -1016,7 +1029,7 @@ class StreamingResponseOrchestrator:
for input_tool in tools:
if input_tool.type == "function":
self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) # type: ignore[typeddict-item,arg-type] # Dict compatible with FunctionDefinition
elif input_tool.type in WebSearchToolTypes:
tool_name = "web_search"
# Need to access tool_groups_api from tool_executor
@ -1055,8 +1068,8 @@ class StreamingResponseOrchestrator:
if isinstance(mcp_tool.allowed_tools, list):
always_allowed = mcp_tool.allowed_tools
elif isinstance(mcp_tool.allowed_tools, AllowedToolsFilter):
always_allowed = mcp_tool.allowed_tools.always
never_allowed = mcp_tool.allowed_tools.never
# AllowedToolsFilter only has tool_names field (not allowed/disallowed)
always_allowed = mcp_tool.allowed_tools.tool_names
# Call list_mcp_tools
tool_defs = None
@ -1088,7 +1101,7 @@ class StreamingResponseOrchestrator:
openai_tool = convert_tooldef_to_chat_tool(t)
if self.ctx.chat_tools is None:
self.ctx.chat_tools = []
self.ctx.chat_tools.append(openai_tool)
self.ctx.chat_tools.append(openai_tool) # type: ignore[arg-type] # Returns dict but ChatCompletionToolParam expects TypedDict
# Add to MCP tool mapping
if t.name in self.mcp_tool_to_server:
@ -1120,12 +1133,16 @@ class StreamingResponseOrchestrator:
self, output_messages: list[OpenAIResponseOutput]
) -> AsyncIterator[OpenAIResponseObjectStream]:
# Handle all mcp tool lists from previous response that are still valid:
# tool_context can be None when no tools are provided in the response request
if self.ctx.tool_context:
for tool in self.ctx.tool_context.previous_tool_listings:
async for evt in self._reuse_mcp_list_tools(tool, output_messages):
yield evt
# Process all remaining tools (including MCP tools) and emit streaming events
if self.ctx.tool_context.tools_to_process:
async for stream_event in self._process_new_tools(self.ctx.tool_context.tools_to_process, output_messages):
async for stream_event in self._process_new_tools(
self.ctx.tool_context.tools_to_process, output_messages
):
yield stream_event
def _approval_required(self, tool_name: str) -> bool:
@ -1220,7 +1237,7 @@ class StreamingResponseOrchestrator:
openai_tool = convert_tooldef_to_openai_tool(tool_def)
if self.ctx.chat_tools is None:
self.ctx.chat_tools = []
self.ctx.chat_tools.append(openai_tool)
self.ctx.chat_tools.append(openai_tool) # type: ignore[arg-type] # Returns dict but ChatCompletionToolParam expects TypedDict
mcp_list_message = OpenAIResponseOutputMessageMCPListTools(
id=f"mcp_list_{uuid.uuid4()}",

View file

@ -7,6 +7,7 @@
import asyncio
import json
from collections.abc import AsyncIterator
from typing import Any
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputToolFileSearch,
@ -22,6 +23,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseObjectStreamResponseWebSearchCallSearching,
OpenAIResponseOutputMessageFileSearchToolCall,
OpenAIResponseOutputMessageFileSearchToolCallResults,
OpenAIResponseOutputMessageMCPCall,
OpenAIResponseOutputMessageWebSearchToolCall,
)
from llama_stack.apis.common.content_types import (
@ -67,7 +69,7 @@ class ToolExecutor:
) -> AsyncIterator[ToolExecutionResult]:
tool_call_id = tool_call.id
function = tool_call.function
tool_kwargs = json.loads(function.arguments) if function.arguments else {}
tool_kwargs = json.loads(function.arguments) if function and function.arguments else {}
if not function or not tool_call_id or not function.name:
yield ToolExecutionResult(sequence_number=sequence_number)
@ -84,7 +86,16 @@ class ToolExecutor:
error_exc, result = await self._execute_tool(function.name, tool_kwargs, ctx, mcp_tool_to_server)
# Emit completion events for tool execution
has_error = error_exc or (result and ((result.error_code and result.error_code > 0) or result.error_message))
has_error = bool(
error_exc
or (
result
and (
((error_code := getattr(result, "error_code", None)) and error_code > 0)
or getattr(result, "error_message", None)
)
)
)
async for event_result in self._emit_completion_events(
function.name, ctx, sequence_number, output_index, item_id, has_error, mcp_tool_to_server
):
@ -101,7 +112,9 @@ class ToolExecutor:
sequence_number=sequence_number,
final_output_message=output_message,
final_input_message=input_message,
citation_files=result.metadata.get("citation_files") if result and result.metadata else None,
citation_files=(
metadata.get("citation_files") if result and (metadata := getattr(result, "metadata", None)) else None
),
)
async def _execute_knowledge_search_via_vector_store(
@ -188,8 +201,9 @@ class ToolExecutor:
citation_files[file_id] = filename
# Cast to proper InterleavedContent type (list invariance)
return ToolInvocationResult(
content=content_items,
content=content_items, # type: ignore[arg-type]
metadata={
"document_ids": [r.file_id for r in search_results],
"chunks": [r.content[0].text if r.content else "" for r in search_results],
@ -209,51 +223,60 @@ class ToolExecutor:
) -> AsyncIterator[ToolExecutionResult]:
"""Emit progress events for tool execution start."""
# Emit in_progress event based on tool type (only for tools with specific streaming events)
progress_event = None
if mcp_tool_to_server and function_name in mcp_tool_to_server:
sequence_number += 1
progress_event = OpenAIResponseObjectStreamResponseMcpCallInProgress(
yield ToolExecutionResult(
stream_event=OpenAIResponseObjectStreamResponseMcpCallInProgress(
item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
),
sequence_number=sequence_number,
)
elif function_name == "web_search":
sequence_number += 1
progress_event = OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
yield ToolExecutionResult(
stream_event=OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
),
sequence_number=sequence_number,
)
elif function_name == "knowledge_search":
sequence_number += 1
progress_event = OpenAIResponseObjectStreamResponseFileSearchCallInProgress(
yield ToolExecutionResult(
stream_event=OpenAIResponseObjectStreamResponseFileSearchCallInProgress(
item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
),
sequence_number=sequence_number,
)
if progress_event:
yield ToolExecutionResult(stream_event=progress_event, sequence_number=sequence_number)
# For web search, emit searching event
if function_name == "web_search":
sequence_number += 1
searching_event = OpenAIResponseObjectStreamResponseWebSearchCallSearching(
yield ToolExecutionResult(
stream_event=OpenAIResponseObjectStreamResponseWebSearchCallSearching(
item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
),
sequence_number=sequence_number,
)
yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
# For file search, emit searching event
if function_name == "knowledge_search":
sequence_number += 1
searching_event = OpenAIResponseObjectStreamResponseFileSearchCallSearching(
yield ToolExecutionResult(
stream_event=OpenAIResponseObjectStreamResponseFileSearchCallSearching(
item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
),
sequence_number=sequence_number,
)
yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
async def _execute_tool(
self,
@ -261,7 +284,7 @@ class ToolExecutor:
tool_kwargs: dict,
ctx: ChatCompletionContext,
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
) -> tuple[Exception | None, any]:
) -> tuple[Exception | None, Any]:
"""Execute the tool and return error exception and result."""
error_exc = None
result = None
@ -284,10 +307,14 @@ class ToolExecutor:
kwargs=tool_kwargs,
)
elif function_name == "knowledge_search":
response_file_search_tool = next(
response_file_search_tool = (
next(
(t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
None,
)
if ctx.response_tools
else None
)
if response_file_search_tool:
# Use vector_stores.search API instead of knowledge_search tool
# to support filters and ranking_options
@ -322,35 +349,34 @@ class ToolExecutor:
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
) -> AsyncIterator[ToolExecutionResult]:
"""Emit completion or failure events for tool execution."""
completion_event = None
if mcp_tool_to_server and function_name in mcp_tool_to_server:
sequence_number += 1
if has_error:
completion_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
mcp_failed_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
sequence_number=sequence_number,
)
yield ToolExecutionResult(stream_event=mcp_failed_event, sequence_number=sequence_number)
else:
completion_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
mcp_completed_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
sequence_number=sequence_number,
)
yield ToolExecutionResult(stream_event=mcp_completed_event, sequence_number=sequence_number)
elif function_name == "web_search":
sequence_number += 1
completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
web_completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
)
yield ToolExecutionResult(stream_event=web_completion_event, sequence_number=sequence_number)
elif function_name == "knowledge_search":
sequence_number += 1
completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted(
file_completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted(
item_id=item_id,
output_index=output_index,
sequence_number=sequence_number,
)
if completion_event:
yield ToolExecutionResult(stream_event=completion_event, sequence_number=sequence_number)
yield ToolExecutionResult(stream_event=file_completion_event, sequence_number=sequence_number)
async def _build_result_messages(
self,
@ -360,21 +386,18 @@ class ToolExecutor:
tool_kwargs: dict,
ctx: ChatCompletionContext,
error_exc: Exception | None,
result: any,
result: Any,
has_error: bool,
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
) -> tuple[any, any]:
) -> tuple[Any, Any]:
"""Build output and input messages from tool execution results."""
from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str,
)
# Build output message
message: Any
if mcp_tool_to_server and function.name in mcp_tool_to_server:
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageMCPCall,
)
message = OpenAIResponseOutputMessageMCPCall(
id=item_id,
arguments=function.arguments,
@ -383,10 +406,14 @@ class ToolExecutor:
)
if error_exc:
message.error = str(error_exc)
elif (result and result.error_code and result.error_code > 0) or (result and result.error_message):
message.error = f"Error (code {result.error_code}): {result.error_message}"
elif result and result.content:
message.output = interleaved_content_as_str(result.content)
elif (result and (error_code := getattr(result, "error_code", None)) and error_code > 0) or (
result and getattr(result, "error_message", None)
):
ec = getattr(result, "error_code", "unknown")
em = getattr(result, "error_message", "")
message.error = f"Error (code {ec}): {em}"
elif result and (content := getattr(result, "content", None)):
message.output = interleaved_content_as_str(content)
else:
if function.name == "web_search":
message = OpenAIResponseOutputMessageWebSearchToolCall(
@ -401,17 +428,17 @@ class ToolExecutor:
queries=[tool_kwargs.get("query", "")],
status="completed",
)
if result and "document_ids" in result.metadata:
if result and (metadata := getattr(result, "metadata", None)) and "document_ids" in metadata:
message.results = []
for i, doc_id in enumerate(result.metadata["document_ids"]):
text = result.metadata["chunks"][i] if "chunks" in result.metadata else None
score = result.metadata["scores"][i] if "scores" in result.metadata else None
for i, doc_id in enumerate(metadata["document_ids"]):
text = metadata["chunks"][i] if "chunks" in metadata else None
score = metadata["scores"][i] if "scores" in metadata else None
message.results.append(
OpenAIResponseOutputMessageFileSearchToolCallResults(
file_id=doc_id,
filename=doc_id,
text=text,
score=score,
text=text if text is not None else "",
score=score if score is not None else 0.0,
attributes={},
)
)
@ -421,27 +448,32 @@ class ToolExecutor:
raise ValueError(f"Unknown tool {function.name} called")
# Build input message
input_message = None
if result and result.content:
if isinstance(result.content, str):
content = result.content
elif isinstance(result.content, list):
content = []
for item in result.content:
input_message: OpenAIToolMessageParam | None = None
if result and (result_content := getattr(result, "content", None)):
# all the mypy contortions here are still unsatisfactory with random Any typing
if isinstance(result_content, str):
msg_content: str | list[Any] = result_content
elif isinstance(result_content, list):
content_list: list[Any] = []
for item in result_content:
part: Any
if isinstance(item, TextContentItem):
part = OpenAIChatCompletionContentPartTextParam(text=item.text)
elif isinstance(item, ImageContentItem):
if item.image.data:
url = f"data:image;base64,{item.image.data}"
url_value = f"data:image;base64,{item.image.data}"
else:
url = item.image.url
part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url))
url_value = str(item.image.url) if item.image.url else ""
part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url_value))
else:
raise ValueError(f"Unknown result content type: {type(item)}")
content.append(part)
content_list.append(part)
msg_content = content_list
else:
raise ValueError(f"Unknown result content type: {type(result.content)}")
input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id)
raise ValueError(f"Unknown result content type: {type(result_content)}")
# OpenAIToolMessageParam accepts str | list[TextParam] but we may have images
# This is runtime-safe as the API accepts it, but mypy complains
input_message = OpenAIToolMessageParam(content=msg_content, tool_call_id=tool_call_id) # type: ignore[arg-type]
else:
text = str(error_exc) if error_exc else "Tool execution failed"
input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)

View file

@ -5,6 +5,7 @@
# the root directory of this source tree.
from dataclasses import dataclass
from typing import cast
from openai.types.chat import ChatCompletionToolParam
from pydantic import BaseModel
@ -100,17 +101,19 @@ class ToolContext(BaseModel):
if isinstance(tool, OpenAIResponseToolMCP):
previous_tools_by_label[tool.server_label] = tool
# collect tool definitions which are the same in current and previous requests:
tools_to_process = []
tools_to_process: list[OpenAIResponseInputTool] = []
matched: dict[str, OpenAIResponseInputToolMCP] = {}
for tool in self.current_tools:
# Mypy confuses OpenAIResponseInputTool (Input union) with OpenAIResponseTool (output union)
# which differ only in MCP type (InputToolMCP vs ToolMCP). Code is correct.
for tool in cast(list[OpenAIResponseInputTool], self.current_tools): # type: ignore[assignment]
if isinstance(tool, OpenAIResponseInputToolMCP) and tool.server_label in previous_tools_by_label:
previous_tool = previous_tools_by_label[tool.server_label]
if previous_tool.allowed_tools == tool.allowed_tools:
matched[tool.server_label] = tool
else:
tools_to_process.append(tool)
tools_to_process.append(tool) # type: ignore[arg-type]
else:
tools_to_process.append(tool)
tools_to_process.append(tool) # type: ignore[arg-type]
# tools that are not the same or were not previously defined need to be processed:
self.tools_to_process = tools_to_process
# for all matched definitions, get the mcp_list_tools objects from the previous output:
@ -119,9 +122,11 @@ class ToolContext(BaseModel):
]
# reconstruct the tool to server mappings that can be reused:
for listing in self.previous_tool_listings:
# listing is OpenAIResponseOutputMessageMCPListTools which has tools: list[MCPListToolsTool]
definition = matched[listing.server_label]
for tool in listing.tools:
self.previous_tools[tool.name] = definition
for mcp_tool in listing.tools:
# mcp_tool is MCPListToolsTool which has a name: str field
self.previous_tools[mcp_tool.name] = definition
def available_tools(self) -> list[OpenAIResponseTool]:
if not self.current_tools:
@ -139,6 +144,8 @@ class ToolContext(BaseModel):
server_label=tool.server_label,
allowed_tools=tool.allowed_tools,
)
# Exhaustive check - all tool types should be handled above
raise AssertionError(f"Unexpected tool type: {type(tool)}")
return [convert_tool(tool) for tool in self.current_tools]

View file

@ -7,6 +7,7 @@
import asyncio
import re
import uuid
from collections.abc import Sequence
from llama_stack.apis.agents.agents import ResponseGuardrailSpec
from llama_stack.apis.agents.openai_responses import (
@ -71,14 +72,14 @@ async def convert_chat_choice_to_response_message(
return OpenAIResponseMessage(
id=message_id or f"msg_{uuid.uuid4()}",
content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)],
content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=list(annotations))],
status="completed",
role="assistant",
)
async def convert_response_content_to_chat_content(
content: (str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]),
content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
) -> str | list[OpenAIChatCompletionContentPartParam]:
"""
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
@ -88,7 +89,8 @@ async def convert_response_content_to_chat_content(
if isinstance(content, str):
return content
converted_parts = []
# Type with union to avoid list invariance issues
converted_parts: list[OpenAIChatCompletionContentPartParam] = []
for content_part in content:
if isinstance(content_part, OpenAIResponseInputMessageContentText):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
@ -158,9 +160,11 @@ async def convert_response_input_to_chat_messages(
),
)
messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
# Output can be None, use empty string as fallback
output_content = input_item.output if input_item.output is not None else ""
messages.append(
OpenAIToolMessageParam(
content=input_item.output,
content=output_content,
tool_call_id=input_item.id,
)
)
@ -172,7 +176,8 @@ async def convert_response_input_to_chat_messages(
):
# these are handled by the responses impl itself and not pass through to chat completions
pass
else:
elif isinstance(input_item, OpenAIResponseMessage):
# Narrow type to OpenAIResponseMessage which has content and role attributes
content = await convert_response_content_to_chat_content(input_item.content)
message_type = await get_message_type_by_role(input_item.role)
if message_type is None:
@ -191,7 +196,8 @@ async def convert_response_input_to_chat_messages(
last_user_content = getattr(last_user_msg, "content", None)
if last_user_content == content:
continue # Skip duplicate user message
messages.append(message_type(content=content))
# Dynamic message type call - different message types have different content expectations
messages.append(message_type(content=content)) # type: ignore[call-arg,arg-type]
if len(tool_call_results):
# Check if unpaired function_call_outputs reference function_calls from previous messages
if previous_messages:
@ -237,8 +243,11 @@ async def convert_response_text_to_chat_response_format(
if text.format["type"] == "json_object":
return OpenAIResponseFormatJSONObject()
if text.format["type"] == "json_schema":
# Assert name exists for json_schema format
assert text.format.get("name"), "json_schema format requires a name"
schema_name: str = text.format["name"] # type: ignore[assignment]
return OpenAIResponseFormatJSONSchema(
json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"])
json_schema=OpenAIJSONSchema(name=schema_name, schema=text.format["schema"])
)
raise ValueError(f"Unsupported text format: {text.format}")
@ -251,7 +260,7 @@ async def get_message_type_by_role(role: str) -> type[OpenAIMessageParam] | None
"assistant": OpenAIAssistantMessageParam,
"developer": OpenAIDeveloperMessageParam,
}
return role_to_type.get(role)
return role_to_type.get(role) # type: ignore[return-value] # Pydantic models use ModelMetaclass
def _extract_citations_from_text(
@ -320,7 +329,8 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
# Look up shields to get their provider_resource_id (actual model ID)
model_ids = []
shields_list = await safety_api.routing_table.list_shields()
# TODO: list_shields not in Safety interface but available at runtime via API routing
shields_list = await safety_api.routing_table.list_shields() # type: ignore[attr-defined]
for guardrail_id in guardrail_ids:
matching_shields = [shield for shield in shields_list.data if shield.identifier == guardrail_id]
@ -337,7 +347,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
for result in response.results:
if result.flagged:
message = result.user_message or "Content blocked by safety guardrails"
flagged_categories = [cat for cat, flagged in result.categories.items() if flagged]
flagged_categories = (
[cat for cat, flagged in result.categories.items() if flagged] if result.categories else []
)
violation_type = result.metadata.get("violation_type", []) if result.metadata else []
if flagged_categories:
@ -347,6 +359,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
return message
# No violations found
return None
def extract_guardrail_ids(guardrails: list | None) -> list[str]:
"""Extract guardrail IDs from guardrails parameter, handling both string IDs and ResponseGuardrailSpec objects."""

View file

@ -6,7 +6,7 @@
import asyncio
from llama_stack.apis.inference import Message
from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger
@ -31,7 +31,7 @@ class ShieldRunnerMixin:
self.input_shields = input_shields
self.output_shields = output_shields
async def run_multiple_shields(self, messages: list[Message], identifiers: list[str]) -> None:
async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
async def run_shield_with_span(identifier: str):
async with tracing.span(f"run_shield_{identifier}"):
return await self.safety_api.run_shield(

View file

@ -33,4 +33,5 @@ class AnthropicInferenceAdapter(OpenAIMixin):
return "https://api.anthropic.com/v1"
async def list_provider_model_ids(self) -> Iterable[str]:
return [m.id async for m in AsyncAnthropic(api_key=self.get_api_key()).models.list()]
api_key = self._get_api_key_from_config_or_provider_data()
return [m.id async for m in AsyncAnthropic(api_key=api_key).models.list()]

View file

@ -33,10 +33,11 @@ class DatabricksInferenceAdapter(OpenAIMixin):
async def list_provider_model_ids(self) -> Iterable[str]:
# Filter out None values from endpoint names
api_token = self._get_api_key_from_config_or_provider_data()
return [
endpoint.name # type: ignore[misc]
for endpoint in WorkspaceClient(
host=self.config.url, token=self.get_api_key()
host=self.config.url, token=api_token
).serving_endpoints.list() # TODO: this is not async
]

View file

@ -128,7 +128,9 @@ class LiteLLMOpenAIMixin(
return schema
async def _get_params(self, request: ChatCompletionRequest) -> dict:
input_dict = {}
from typing import Any
input_dict: dict[str, Any] = {}
input_dict["messages"] = [
await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages
@ -139,30 +141,27 @@ class LiteLLMOpenAIMixin(
f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported."
)
fmt = fmt.json_schema
name = fmt["title"]
del fmt["title"]
fmt["additionalProperties"] = False
# Convert to dict for manipulation
fmt_dict = dict(fmt.json_schema)
name = fmt_dict["title"]
del fmt_dict["title"]
fmt_dict["additionalProperties"] = False
# Apply additionalProperties: False recursively to all objects
fmt = self._add_additional_properties_recursive(fmt)
fmt_dict = self._add_additional_properties_recursive(fmt_dict)
input_dict["response_format"] = {
"type": "json_schema",
"json_schema": {
"name": name,
"schema": fmt,
"schema": fmt_dict,
"strict": self.json_schema_strict,
},
}
if request.tools:
input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
if request.tool_config.tool_choice:
input_dict["tool_choice"] = (
request.tool_config.tool_choice.value
if isinstance(request.tool_config.tool_choice, ToolChoice)
else request.tool_config.tool_choice
)
if request.tool_config and (tool_choice := request.tool_config.tool_choice):
input_dict["tool_choice"] = tool_choice.value if isinstance(tool_choice, ToolChoice) else tool_choice
return {
"model": request.model,
@ -176,9 +175,9 @@ class LiteLLMOpenAIMixin(
def get_api_key(self) -> str:
provider_data = self.get_request_provider_data()
key_field = self.provider_data_api_key_field
if provider_data and getattr(provider_data, key_field, None):
api_key = getattr(provider_data, key_field)
else:
if provider_data and key_field and (api_key := getattr(provider_data, key_field, None)):
return str(api_key) # type: ignore[no-any-return] # getattr returns Any, can't narrow without runtime type inspection
api_key = self.api_key_from_config
if not api_key:
raise ValueError(
@ -192,7 +191,13 @@ class LiteLLMOpenAIMixin(
self,
params: OpenAIEmbeddingsRequestWithExtraBody,
) -> OpenAIEmbeddingsResponse:
if not self.model_store:
raise ValueError("Model store is not initialized")
model_obj = await self.model_store.get_model(params.model)
if model_obj.provider_resource_id is None:
raise ValueError(f"Model {params.model} has no provider_resource_id")
provider_resource_id = model_obj.provider_resource_id
# Convert input to list if it's a string
input_list = [params.input] if isinstance(params.input, str) else params.input
@ -200,7 +205,7 @@ class LiteLLMOpenAIMixin(
# Call litellm embedding function
# litellm.drop_params = True
response = litellm.embedding(
model=self.get_litellm_model_name(model_obj.provider_resource_id),
model=self.get_litellm_model_name(provider_resource_id),
input=input_list,
api_key=self.get_api_key(),
api_base=self.api_base,
@ -217,7 +222,7 @@ class LiteLLMOpenAIMixin(
return OpenAIEmbeddingsResponse(
data=data,
model=model_obj.provider_resource_id,
model=provider_resource_id,
usage=usage,
)
@ -225,10 +230,16 @@ class LiteLLMOpenAIMixin(
self,
params: OpenAICompletionRequestWithExtraBody,
) -> OpenAICompletion:
if not self.model_store:
raise ValueError("Model store is not initialized")
model_obj = await self.model_store.get_model(params.model)
if model_obj.provider_resource_id is None:
raise ValueError(f"Model {params.model} has no provider_resource_id")
provider_resource_id = model_obj.provider_resource_id
request_params = await prepare_openai_completion_params(
model=self.get_litellm_model_name(model_obj.provider_resource_id),
model=self.get_litellm_model_name(provider_resource_id),
prompt=params.prompt,
best_of=params.best_of,
echo=params.echo,
@ -249,7 +260,8 @@ class LiteLLMOpenAIMixin(
api_key=self.get_api_key(),
api_base=self.api_base,
)
return await litellm.atext_completion(**request_params)
# LiteLLM returns compatible type but mypy can't verify external library
return await litellm.atext_completion(**request_params) # type: ignore[no-any-return] # external lib lacks type stubs
async def openai_chat_completion(
self,
@ -265,10 +277,16 @@ class LiteLLMOpenAIMixin(
elif "include_usage" not in stream_options:
stream_options = {**stream_options, "include_usage": True}
if not self.model_store:
raise ValueError("Model store is not initialized")
model_obj = await self.model_store.get_model(params.model)
if model_obj.provider_resource_id is None:
raise ValueError(f"Model {params.model} has no provider_resource_id")
provider_resource_id = model_obj.provider_resource_id
request_params = await prepare_openai_completion_params(
model=self.get_litellm_model_name(model_obj.provider_resource_id),
model=self.get_litellm_model_name(provider_resource_id),
messages=params.messages,
frequency_penalty=params.frequency_penalty,
function_call=params.function_call,
@ -294,7 +312,8 @@ class LiteLLMOpenAIMixin(
api_key=self.get_api_key(),
api_base=self.api_base,
)
return await litellm.acompletion(**request_params)
# LiteLLM returns compatible type but mypy can't verify external library
return await litellm.acompletion(**request_params) # type: ignore[no-any-return] # external lib lacks type stubs
async def check_model_availability(self, model: str) -> bool:
"""

View file

@ -161,7 +161,9 @@ def get_sampling_strategy_options(params: SamplingParams) -> dict:
if isinstance(params.strategy, GreedySamplingStrategy):
options["temperature"] = 0.0
elif isinstance(params.strategy, TopPSamplingStrategy):
if params.strategy.temperature is not None:
options["temperature"] = params.strategy.temperature
if params.strategy.top_p is not None:
options["top_p"] = params.strategy.top_p
elif isinstance(params.strategy, TopKSamplingStrategy):
options["top_k"] = params.strategy.top_k
@ -192,12 +194,12 @@ def get_sampling_options(params: SamplingParams | None) -> dict:
def text_from_choice(choice) -> str:
if hasattr(choice, "delta") and choice.delta:
return choice.delta.content
return choice.delta.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations
if hasattr(choice, "message"):
return choice.message.content
return choice.message.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations
return choice.text
return choice.text # type: ignore[no-any-return] # external OpenAI types lack precise annotations
def get_stop_reason(finish_reason: str) -> StopReason:
@ -216,7 +218,7 @@ def convert_openai_completion_logprobs(
) -> list[TokenLogProbs] | None:
if not logprobs:
return None
if hasattr(logprobs, "top_logprobs"):
if hasattr(logprobs, "top_logprobs") and logprobs.top_logprobs:
return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
# Together supports logprobs with top_k=1 only. This means for each token position,
@ -236,7 +238,7 @@ def convert_openai_completion_logprobs_stream(text: str, logprobs: float | OpenA
if isinstance(logprobs, float):
# Adapt response from Together CompletionChoicesChunk
return [TokenLogProbs(logprobs_by_token={text: logprobs})]
if hasattr(logprobs, "top_logprobs"):
if hasattr(logprobs, "top_logprobs") and logprobs.top_logprobs:
return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
return None
@ -245,23 +247,24 @@ def process_completion_response(
response: OpenAICompatCompletionResponse,
) -> CompletionResponse:
choice = response.choices[0]
text = choice.text or ""
# drop suffix <eot_id> if present and return stop reason as end of turn
if choice.text.endswith("<|eot_id|>"):
if text.endswith("<|eot_id|>"):
return CompletionResponse(
stop_reason=StopReason.end_of_turn,
content=choice.text[: -len("<|eot_id|>")],
content=text[: -len("<|eot_id|>")],
logprobs=convert_openai_completion_logprobs(choice.logprobs),
)
# drop suffix <eom_id> if present and return stop reason as end of message
if choice.text.endswith("<|eom_id|>"):
if text.endswith("<|eom_id|>"):
return CompletionResponse(
stop_reason=StopReason.end_of_message,
content=choice.text[: -len("<|eom_id|>")],
content=text[: -len("<|eom_id|>")],
logprobs=convert_openai_completion_logprobs(choice.logprobs),
)
return CompletionResponse(
stop_reason=get_stop_reason(choice.finish_reason),
content=choice.text,
stop_reason=get_stop_reason(choice.finish_reason or "stop"),
content=text,
logprobs=convert_openai_completion_logprobs(choice.logprobs),
)
@ -272,10 +275,10 @@ def process_chat_completion_response(
) -> ChatCompletionResponse:
choice = response.choices[0]
if choice.finish_reason == "tool_calls":
if not choice.message or not choice.message.tool_calls:
if not hasattr(choice, "message") or not choice.message or not choice.message.tool_calls: # type: ignore[attr-defined] # OpenAICompatCompletionChoice is runtime duck-typed
raise ValueError("Tool calls are not present in the response")
tool_calls = [convert_tool_call(tool_call) for tool_call in choice.message.tool_calls]
tool_calls = [convert_tool_call(tool_call) for tool_call in choice.message.tool_calls] # type: ignore[attr-defined] # OpenAICompatCompletionChoice is runtime duck-typed
if any(isinstance(tool_call, UnparseableToolCall) for tool_call in tool_calls):
# If we couldn't parse a tool call, jsonify the tool calls and return them
return ChatCompletionResponse(
@ -287,9 +290,11 @@ def process_chat_completion_response(
)
else:
# Otherwise, return tool calls as normal
# Filter to only valid ToolCall objects
valid_tool_calls = [tc for tc in tool_calls if isinstance(tc, ToolCall)]
return ChatCompletionResponse(
completion_message=CompletionMessage(
tool_calls=tool_calls,
tool_calls=valid_tool_calls,
stop_reason=StopReason.end_of_turn,
# Content is not optional
content="",
@ -299,7 +304,7 @@ def process_chat_completion_response(
# TODO: This does not work well with tool calls for vLLM remote provider
# Ref: https://github.com/meta-llama/llama-stack/issues/1058
raw_message = decode_assistant_message(text_from_choice(choice), get_stop_reason(choice.finish_reason))
raw_message = decode_assistant_message(text_from_choice(choice), get_stop_reason(choice.finish_reason or "stop"))
# NOTE: If we do not set tools in chat-completion request, we should not
# expect the ToolCall in the response. Instead, we should return the raw
@ -324,8 +329,8 @@ def process_chat_completion_response(
return ChatCompletionResponse(
completion_message=CompletionMessage(
content=raw_message.content,
stop_reason=raw_message.stop_reason,
content=raw_message.content, # type: ignore[arg-type] # decode_assistant_message returns Union[str, InterleavedContent]
stop_reason=raw_message.stop_reason or StopReason.end_of_turn,
tool_calls=raw_message.tool_calls,
),
logprobs=None,
@ -448,7 +453,7 @@ async def process_chat_completion_stream_response(
)
# parse tool calls and report errors
message = decode_assistant_message(buffer, stop_reason)
message = decode_assistant_message(buffer, stop_reason or StopReason.end_of_turn)
parsed_tool_calls = len(message.tool_calls) > 0
if ipython and not parsed_tool_calls:
@ -463,7 +468,7 @@ async def process_chat_completion_stream_response(
)
)
request_tools = {t.tool_name: t for t in request.tools}
request_tools = {t.tool_name: t for t in (request.tools or [])}
for tool_call in message.tool_calls:
if tool_call.tool_name in request_tools:
yield ChatCompletionResponseStreamChunk(
@ -525,7 +530,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
}
if hasattr(message, "tool_calls") and message.tool_calls:
result["tool_calls"] = []
tool_calls_list = []
for tc in message.tool_calls:
# The tool.tool_name can be a str or a BuiltinTool enum. If
# it's the latter, convert to a string.
@ -533,7 +538,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
if isinstance(tool_name, BuiltinTool):
tool_name = tool_name.value
result["tool_calls"].append(
tool_calls_list.append(
{
"id": tc.call_id,
"type": "function",
@ -543,6 +548,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
},
}
)
result["tool_calls"] = tool_calls_list # type: ignore[assignment] # dict allows Any value, stricter type expected
return result
@ -608,7 +614,7 @@ async def convert_message_to_openai_dict_new(
),
)
elif isinstance(content_, list):
return [await impl(item) for item in content_]
return [await impl(item) for item in content_] # type: ignore[misc] # recursive list comprehension confuses mypy's type narrowing
else:
raise ValueError(f"Unsupported content type: {type(content_)}")
@ -620,7 +626,7 @@ async def convert_message_to_openai_dict_new(
else:
return [ret]
out: OpenAIChatCompletionMessage = None
out: OpenAIChatCompletionMessage
if isinstance(message, UserMessage):
out = OpenAIChatCompletionUserMessage(
role="user",
@ -636,7 +642,7 @@ async def convert_message_to_openai_dict_new(
),
type="function",
)
for tool in message.tool_calls
for tool in (message.tool_calls or [])
]
params = {}
if tool_calls:
@ -644,18 +650,18 @@ async def convert_message_to_openai_dict_new(
out = OpenAIChatCompletionAssistantMessage(
role="assistant",
content=await _convert_message_content(message.content),
**params,
**params, # type: ignore[typeddict-item] # tool_calls dict expansion conflicts with TypedDict optional field
)
elif isinstance(message, ToolResponseMessage):
out = OpenAIChatCompletionToolMessage(
role="tool",
tool_call_id=message.call_id,
content=await _convert_message_content(message.content),
content=await _convert_message_content(message.content), # type: ignore[typeddict-item] # content union type incompatible with TypedDict str requirement
)
elif isinstance(message, SystemMessage):
out = OpenAIChatCompletionSystemMessage(
role="system",
content=await _convert_message_content(message.content),
content=await _convert_message_content(message.content), # type: ignore[typeddict-item] # content union type incompatible with TypedDict str requirement
)
else:
raise ValueError(f"Unsupported message type: {type(message)}")
@ -758,16 +764,16 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
function = out["function"]
if isinstance(tool.tool_name, BuiltinTool):
function["name"] = tool.tool_name.value
function["name"] = tool.tool_name.value # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
else:
function["name"] = tool.tool_name
function["name"] = tool.tool_name # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
if tool.description:
function["description"] = tool.description
function["description"] = tool.description # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
if tool.input_schema:
# Pass through the entire JSON Schema as-is
function["parameters"] = tool.input_schema
function["parameters"] = tool.input_schema # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
# NOTE: OpenAI does not support output_schema, so we drop it here
# It's stored in LlamaStack for validation and other provider usage
@ -815,15 +821,15 @@ def _convert_openai_request_tool_config(tool_choice: str | dict[str, Any] | None
tool_config = ToolConfig()
if tool_choice:
try:
tool_choice = ToolChoice(tool_choice)
tool_choice = ToolChoice(tool_choice) # type: ignore[assignment] # reassigning to enum narrows union but mypy can't track after exception
except ValueError:
pass
tool_config.tool_choice = tool_choice
tool_config.tool_choice = tool_choice # type: ignore[assignment] # ToolConfig.tool_choice accepts Union[ToolChoice, dict] but mypy tracks narrower type
return tool_config
def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]:
lls_tools = []
lls_tools: list[ToolDefinition] = []
if not tools:
return lls_tools
@ -843,16 +849,16 @@ def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) ->
def _convert_openai_request_response_format(
response_format: OpenAIResponseFormatParam = None,
response_format: OpenAIResponseFormatParam | None = None,
):
if not response_format:
return None
# response_format can be a dict or a pydantic model
response_format = dict(response_format)
if response_format.get("type", "") == "json_schema":
response_format_dict = dict(response_format) # type: ignore[arg-type] # OpenAIResponseFormatParam union needs dict conversion
if response_format_dict.get("type", "") == "json_schema":
return JsonSchemaResponseFormat(
type="json_schema",
json_schema=response_format.get("json_schema", {}).get("schema", ""),
type="json_schema", # type: ignore[arg-type] # Literal["json_schema"] incompatible with expected type
json_schema=response_format_dict.get("json_schema", {}).get("schema", ""),
)
return None
@ -938,16 +944,15 @@ def _convert_openai_sampling_params(
# Map an explicit temperature of 0 to greedy sampling
if temperature == 0:
strategy = GreedySamplingStrategy()
sampling_params.strategy = GreedySamplingStrategy()
else:
# OpenAI defaults to 1.0 for temperature and top_p if unset
if temperature is None:
temperature = 1.0
if top_p is None:
top_p = 1.0
strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p)
sampling_params.strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p) # type: ignore[assignment] # SamplingParams.strategy union accepts this type
sampling_params.strategy = strategy
return sampling_params
@ -957,23 +962,24 @@ def openai_messages_to_messages(
"""
Convert a list of OpenAIChatCompletionMessage into a list of Message.
"""
converted_messages = []
converted_messages: list[Message] = []
for message in messages:
converted_message: Message
if message.role == "system":
converted_message = SystemMessage(content=openai_content_to_content(message.content))
converted_message = SystemMessage(content=openai_content_to_content(message.content)) # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
elif message.role == "user":
converted_message = UserMessage(content=openai_content_to_content(message.content))
converted_message = UserMessage(content=openai_content_to_content(message.content)) # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
elif message.role == "assistant":
converted_message = CompletionMessage(
content=openai_content_to_content(message.content),
tool_calls=_convert_openai_tool_calls(message.tool_calls),
content=openai_content_to_content(message.content), # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
tool_calls=_convert_openai_tool_calls(message.tool_calls) if message.tool_calls else [], # type: ignore[arg-type] # OpenAI tool_calls type incompatible with conversion function
stop_reason=StopReason.end_of_turn,
)
elif message.role == "tool":
converted_message = ToolResponseMessage(
role="tool",
call_id=message.tool_call_id,
content=openai_content_to_content(message.content),
content=openai_content_to_content(message.content), # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
)
else:
raise ValueError(f"Unknown role {message.role}")
@ -990,9 +996,9 @@ def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionConten
return [openai_content_to_content(c) for c in content]
elif hasattr(content, "type"):
if content.type == "text":
return TextContentItem(type="text", text=content.text)
return TextContentItem(type="text", text=content.text) # type: ignore[attr-defined] # Iterable narrowed by hasattr check but mypy doesn't track
elif content.type == "image_url":
return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url)))
return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url))) # type: ignore[attr-defined] # Iterable narrowed by hasattr check but mypy doesn't track
else:
raise ValueError(f"Unknown content type: {content.type}")
else:
@ -1041,9 +1047,9 @@ def convert_openai_chat_completion_choice(
completion_message=CompletionMessage(
content=choice.message.content or "", # CompletionMessage content is not optional
stop_reason=_convert_openai_finish_reason(choice.finish_reason),
tool_calls=_convert_openai_tool_calls(choice.message.tool_calls),
tool_calls=_convert_openai_tool_calls(choice.message.tool_calls) if choice.message.tool_calls else [], # type: ignore[arg-type] # OpenAI tool_calls Optional type broadens union
),
logprobs=_convert_openai_logprobs(getattr(choice, "logprobs", None)),
logprobs=_convert_openai_logprobs(getattr(choice, "logprobs", None)), # type: ignore[arg-type] # getattr returns Any, can't narrow without inspection
)
@ -1070,7 +1076,7 @@ async def convert_openai_chat_completion_stream(
choice = chunk.choices[0] # assuming only one choice per chunk
# we assume there's only one finish_reason in the stream
stop_reason = _convert_openai_finish_reason(choice.finish_reason) or stop_reason
stop_reason = _convert_openai_finish_reason(choice.finish_reason) if choice.finish_reason else stop_reason
logprobs = getattr(choice, "logprobs", None)
# if there's a tool call, emit an event for each tool in the list
@ -1083,7 +1089,7 @@ async def convert_openai_chat_completion_stream(
event=ChatCompletionResponseEvent(
event_type=event_type,
delta=TextDelta(text=choice.delta.content),
logprobs=_convert_openai_logprobs(logprobs),
logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
)
)
@ -1101,10 +1107,10 @@ async def convert_openai_chat_completion_stream(
event=ChatCompletionResponseEvent(
event_type=event_type,
delta=ToolCallDelta(
tool_call=_convert_openai_tool_calls([tool_call])[0],
tool_call=_convert_openai_tool_calls([tool_call])[0], # type: ignore[arg-type, list-item] # delta tool_call type differs from complete tool_call
parse_status=ToolCallParseStatus.succeeded,
),
logprobs=_convert_openai_logprobs(logprobs),
logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
)
)
else:
@ -1125,11 +1131,14 @@ async def convert_openai_chat_completion_stream(
if tool_call.function.name:
buffer["name"] = tool_call.function.name
delta = f"{buffer['name']}("
if buffer["content"] is not None:
buffer["content"] += delta
if tool_call.function.arguments:
delta = tool_call.function.arguments
if buffer["arguments"] is not None and delta:
buffer["arguments"] += delta
if buffer["content"] is not None and delta:
buffer["content"] += delta
yield ChatCompletionResponseStreamChunk(
@ -1139,7 +1148,7 @@ async def convert_openai_chat_completion_stream(
tool_call=delta,
parse_status=ToolCallParseStatus.in_progress,
),
logprobs=_convert_openai_logprobs(logprobs),
logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
)
)
elif choice.delta.content:
@ -1147,7 +1156,7 @@ async def convert_openai_chat_completion_stream(
event=ChatCompletionResponseEvent(
event_type=event_type,
delta=TextDelta(text=choice.delta.content or ""),
logprobs=_convert_openai_logprobs(logprobs),
logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
)
)
@ -1155,6 +1164,7 @@ async def convert_openai_chat_completion_stream(
logger.debug(f"toolcall_buffer[{idx}]: {buffer}")
if buffer["name"]:
delta = ")"
if buffer["content"] is not None:
buffer["content"] += delta
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
@ -1168,16 +1178,16 @@ async def convert_openai_chat_completion_stream(
)
try:
tool_call = ToolCall(
call_id=buffer["call_id"],
tool_name=buffer["name"],
arguments=buffer["arguments"],
parsed_tool_call = ToolCall(
call_id=buffer["call_id"] or "",
tool_name=buffer["name"] or "",
arguments=buffer["arguments"] or "",
)
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress,
delta=ToolCallDelta(
tool_call=tool_call,
tool_call=parsed_tool_call, # type: ignore[arg-type] # ToolCallDelta.tool_call accepts Union[str, ToolCall]
parse_status=ToolCallParseStatus.succeeded,
),
stop_reason=stop_reason,
@ -1189,7 +1199,7 @@ async def convert_openai_chat_completion_stream(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress,
delta=ToolCallDelta(
tool_call=buffer["content"],
tool_call=buffer["content"], # type: ignore[arg-type] # ToolCallDelta.tool_call accepts Union[str, ToolCall]
parse_status=ToolCallParseStatus.failed,
),
stop_reason=stop_reason,
@ -1250,7 +1260,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
top_p: float | None = None,
user: str | None = None,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
messages = openai_messages_to_messages(messages)
messages = openai_messages_to_messages(messages) # type: ignore[assignment] # converted from OpenAI to LlamaStack message format
response_format = _convert_openai_request_response_format(response_format)
sampling_params = _convert_openai_sampling_params(
max_tokens=max_tokens,
@ -1259,15 +1269,15 @@ class OpenAIChatCompletionToLlamaStackMixin:
)
tool_config = _convert_openai_request_tool_config(tool_choice)
tools = _convert_openai_request_tools(tools)
tools = _convert_openai_request_tools(tools) # type: ignore[assignment] # converted from OpenAI to LlamaStack tool format
if tool_config.tool_choice == ToolChoice.none:
tools = []
tools = [] # type: ignore[assignment] # empty list narrows return type but mypy tracks broader type
outstanding_responses = []
# "n" is the number of completions to generate per prompt
n = n or 1
for _i in range(0, n):
response = self.chat_completion(
response = self.chat_completion( # type: ignore[attr-defined] # mixin expects class to implement chat_completion
model_id=model,
messages=messages,
sampling_params=sampling_params,
@ -1279,7 +1289,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
outstanding_responses.append(response)
if stream:
return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses)
return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses) # type: ignore[no-any-return] # mixin async generator return type too complex for mypy
return await OpenAIChatCompletionToLlamaStackMixin._process_non_stream_response(
self, model, outstanding_responses
@ -1295,14 +1305,16 @@ class OpenAIChatCompletionToLlamaStackMixin:
response = await outstanding_response
async for chunk in response:
event = chunk.event
finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason)
finish_reason = (
_convert_stop_reason_to_openai_finish_reason(event.stop_reason) if event.stop_reason else None
)
if isinstance(event.delta, TextDelta):
text_delta = event.delta.text
delta = OpenAIChoiceDelta(content=text_delta)
yield OpenAIChatCompletionChunk(
id=id,
choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)],
choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)], # type: ignore[arg-type] # finish_reason Optional[str] incompatible with Literal union
created=int(time.time()),
model=model,
object="chat.completion.chunk",
@ -1310,13 +1322,17 @@ class OpenAIChatCompletionToLlamaStackMixin:
elif isinstance(event.delta, ToolCallDelta):
if event.delta.parse_status == ToolCallParseStatus.succeeded:
tool_call = event.delta.tool_call
if isinstance(tool_call, str):
continue
# First chunk includes full structure
openai_tool_call = OpenAIChoiceDeltaToolCall(
index=0,
id=tool_call.call_id,
function=OpenAIChoiceDeltaToolCallFunction(
name=tool_call.tool_name,
name=tool_call.tool_name
if isinstance(tool_call.tool_name, str)
else tool_call.tool_name.value, # type: ignore[arg-type] # enum .value extraction on Union confuses mypy
arguments="",
),
)
@ -1324,7 +1340,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
yield OpenAIChatCompletionChunk(
id=id,
choices=[
OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)
OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta) # type: ignore[arg-type] # finish_reason Optional[str] incompatible with Literal union
],
created=int(time.time()),
model=model,
@ -1341,7 +1357,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
yield OpenAIChatCompletionChunk(
id=id,
choices=[
OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)
OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta) # type: ignore[arg-type] # finish_reason Optional[str] incompatible with Literal union
],
created=int(time.time()),
model=model,
@ -1351,7 +1367,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
async def _process_non_stream_response(
self, model: str, outstanding_responses: list[Awaitable[ChatCompletionResponse]]
) -> OpenAIChatCompletion:
choices = []
choices: list[OpenAIChatCompletionChoice] = []
for outstanding_response in outstanding_responses:
response = await outstanding_response
completion_message = response.completion_message
@ -1360,14 +1376,14 @@ class OpenAIChatCompletionToLlamaStackMixin:
choice = OpenAIChatCompletionChoice(
index=len(choices),
message=message,
message=message, # type: ignore[arg-type] # OpenAIChatCompletionMessage union incompatible with narrower Message type
finish_reason=finish_reason,
)
choices.append(choice)
choices.append(choice) # type: ignore[arg-type] # OpenAIChatCompletionChoice type annotation mismatch
return OpenAIChatCompletion(
id=f"chatcmpl-{uuid.uuid4()}",
choices=choices,
choices=choices, # type: ignore[arg-type] # list[OpenAIChatCompletionChoice] union incompatible
created=int(time.time()),
model=model,
object="chat.completion",

View file

@ -196,6 +196,7 @@ def make_overlapped_chunks(
chunks.append(
Chunk(
content=chunk,
chunk_id=chunk_id,
metadata=chunk_metadata,
chunk_metadata=backend_chunk_metadata,
)

View file

@ -430,6 +430,32 @@ def _unwrap_generic_list(typ: type[list[T]]) -> type[T]:
return list_type # type: ignore[no-any-return]
def is_generic_sequence(typ: object) -> bool:
"True if the specified type is a generic Sequence, i.e. `Sequence[T]`."
import collections.abc
typ = unwrap_annotated_type(typ)
return typing.get_origin(typ) is collections.abc.Sequence
def unwrap_generic_sequence(typ: object) -> type:
"""
Extracts the item type of a Sequence type.
:param typ: The Sequence type `Sequence[T]`.
:returns: The item type `T`.
"""
return rewrap_annotated_type(_unwrap_generic_sequence, typ) # type: ignore[arg-type]
def _unwrap_generic_sequence(typ: object) -> type:
"Extracts the item type of a Sequence type (e.g. returns `T` for `Sequence[T]`)."
(sequence_type,) = typing.get_args(typ) # unpack single tuple element
return sequence_type # type: ignore[no-any-return]
def is_generic_set(typ: object) -> TypeGuard[type[set]]:
"True if the specified type is a generic set, i.e. `Set[T]`."

View file

@ -18,10 +18,12 @@ from .inspection import (
TypeLike,
is_generic_dict,
is_generic_list,
is_generic_sequence,
is_type_optional,
is_type_union,
unwrap_generic_dict,
unwrap_generic_list,
unwrap_generic_sequence,
unwrap_optional_type,
unwrap_union_types,
)
@ -155,24 +157,28 @@ def python_type_to_name(data_type: TypeLike, force: bool = False) -> str:
if metadata is not None:
# type is Annotated[T, ...]
arg = typing.get_args(data_type)[0]
return python_type_to_name(arg)
return python_type_to_name(arg, force=force)
if force:
# generic types
if is_type_optional(data_type, strict=True):
inner_name = python_type_to_name(unwrap_optional_type(data_type))
inner_name = python_type_to_name(unwrap_optional_type(data_type), force=True)
return f"Optional__{inner_name}"
elif is_generic_list(data_type):
item_name = python_type_to_name(unwrap_generic_list(data_type))
item_name = python_type_to_name(unwrap_generic_list(data_type), force=True)
return f"List__{item_name}"
elif is_generic_sequence(data_type):
# Treat Sequence the same as List for schema generation purposes
item_name = python_type_to_name(unwrap_generic_sequence(data_type), force=True)
return f"List__{item_name}"
elif is_generic_dict(data_type):
key_type, value_type = unwrap_generic_dict(data_type)
key_name = python_type_to_name(key_type)
value_name = python_type_to_name(value_type)
key_name = python_type_to_name(key_type, force=True)
value_name = python_type_to_name(value_type, force=True)
return f"Dict__{key_name}__{value_name}"
elif is_type_union(data_type):
member_types = unwrap_union_types(data_type)
member_names = "__".join(python_type_to_name(member_type) for member_type in member_types)
member_names = "__".join(python_type_to_name(member_type, force=True) for member_type in member_types)
return f"Union__{member_names}"
# named system or user-defined type

View file

@ -111,7 +111,7 @@ def get_class_property_docstrings(
def docstring_to_schema(data_type: type) -> Schema:
short_description, long_description = get_class_docstrings(data_type)
schema: Schema = {
"title": python_type_to_name(data_type),
"title": python_type_to_name(data_type, force=True),
}
description = "\n".join(filter(None, [short_description, long_description]))
@ -417,6 +417,10 @@ class JsonSchemaGenerator:
if origin_type is list:
(list_type,) = typing.get_args(typ) # unpack single tuple element
return {"type": "array", "items": self.type_to_schema(list_type)}
elif origin_type is collections.abc.Sequence:
# Treat Sequence the same as list for JSON schema (both are arrays)
(sequence_type,) = typing.get_args(typ) # unpack single tuple element
return {"type": "array", "items": self.type_to_schema(sequence_type)}
elif origin_type is dict:
key_type, value_type = typing.get_args(typ)
if not (key_type is str or key_type is int or is_type_enum(key_type)):

View file

@ -39,7 +39,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
if provider.provider_type in (
"inline::meta-reference",
"inline::sentence-transformers",
"inline::vllm",
"remote::vllm",
"remote::bedrock",
"remote::databricks",
# Technically Nvidia does support OpenAI completions, but none of their hosted models
@ -120,7 +120,7 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
if provider.provider_type in (
"inline::meta-reference",
"inline::sentence-transformers",
"inline::vllm",
"remote::vllm",
"remote::bedrock",
"remote::databricks",
"remote::cerebras",

View file

@ -0,0 +1,763 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-861837565219, score: 0.015252742239920682, attributes: {'filename': 'test_response_non_streaming_file_search.txt', 'chunk_id': '869ae0c0-ab85-ca6f-e5d0-024381443c27', 'document_id': 'file-861837565219', 'token_count': 10.0, 'metadata_token_count': 13.0} (cite as <|file-861837565219|>)\nLlama 4 Maverick has 128 experts\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "OEZj77MujzEilF"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "aZ37vwWHFrpGy"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "csghpwq82thpEG"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "1dRxATyjFkzZ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "DkAEGxNVXrhL9KJ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "SI7v0ofTi6JL0LP"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "tThgm0YItJ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "5UnIV9ZM2koPE"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "pFPs5HfBSA"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CIT42IHpAEgx"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "jpXixTaXlYSxTu3"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "IBEKia6bwNtLB"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "hHMPPr4Q"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "iGTIWlxj9c2Equ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "wfQImUZLNC8Dtgc"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "m21wFuqSLpMN"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CP5N1QxHqEnzbnq"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "861",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "jgQZ9egEpAiQv"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "837",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "viNedPoe13lJJ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "565",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "j2gGBSzOagN98"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "219",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "d4iMNITon2xM3"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "67lYY4LnZsfKd3U"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "bMllpJPicr01Ip"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "ZgWEFMbo3w"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-05434d44cd8a",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 23,
"prompt_tokens": 352,
"total_tokens": 375,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "Wwt10anxWJDla"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,767 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts_pdf]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_M8gyYiB39MwYdJKc4aHIGbfA",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_M8gyYiB39MwYdJKc4aHIGbfA",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 2 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-379221123213, score: 0.008294223715346738, attributes: {'filename': 'llama_stack_and_models.pdf', 'chunk_id': 'c3556aea-3b73-0278-aa16-ebbdb4c18b18', 'document_id': 'file-379221123213', 'token_count': 98.0, 'metadata_token_count': 11.0} (cite as <|file-379221123213|>)\n, \nhardware\n \nvendors,\n \nand\n \nAI-focused\n \ncompanies)\n \nthat\n \noffer\n \ntailored\n \ninfrastructure,\n \nsoftware,\n \nand\n \nservices\n \nfor\n \ndeploying\n \nLlama\n \nmodels.\n \nLlama 4 Maverick \n Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts. \n"
},
{
"type": "text",
"text": "[2] document_id: file-379221123213, score: 0.0033899213359898477, attributes: {'filename': 'llama_stack_and_models.pdf', 'chunk_id': '16d99c69-8323-27ce-3bd7-7b51dcac2735', 'document_id': 'file-379221123213', 'token_count': 498.0, 'metadata_token_count': 11.0} (cite as <|file-379221123213|>)\nLlama Stack \nLlama Stack Overview \nLlama Stack standardizes the core building blocks that simplify AI application development. It codifies best \npractices\n \nacross\n \nthe\n \nLlama\n \necosystem.\n \nMore\n \nspecifically,\n \nit\n \nprovides\n \u25cf Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry. \u25cf Plugin architecture to support the rich ecosystem of different API implementations in various \nenvironments,\n \nincluding\n \nlocal\n \ndevelopment,\n \non-premises,\n \ncloud,\n \nand\n \nmobile.\n \u25cf Prepackaged verified distributions which offer a one-stop solution for developers to get started quickly \nand\n \nreliably\n \nin\n \nany\n \nenvironment.\n \u25cf Multiple developer interfaces like CLI and SDKs for Python, Typescript, iOS, and Android. \u25cf Standalone applications as examples for how to build production-grade AI applications with Llama \nStack.\n \nLlama Stack Benefits \n\u25cf Flexible Options: Developers can choose their preferred infrastructure without changing APIs and enjoy \nflexible\n \ndeployment\n \nchoices.\n \u25cf Consistent Experience: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI \napplications\n \nwith\n \nconsistent\n \napplication\n \nbehavior.\n \u25cf Robust Ecosystem: Llama Stack is already integrated with distribution partners (cloud providers, \nhardware\n \nvendors,\n \nand\n \nAI-focused\n \ncompanies)\n \nthat\n \noffer\n \ntailored\n \ninfrastructure,\n \nsoftware,\n \nand\n \nservices\n \nfor\n \ndeploying\n \nLlama\n \nmodels.\n \nLlama 4 Maverick \n Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts. \n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "SH6nRcfXzd8qPg"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "vbJu1mhpQKtNr"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "uAUiYAVpMW8Ph9"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "DJxjs1HFugOD"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "sU2IncrauGmuYki"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "IkZbrWS45cqkmqi"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "YbZYhGgoGE"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "7FtHnapGtkc09"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "8P3mUr7HfV"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "WxYXJUfkyxqZ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "E9hIXNC7oeJcZ8v"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "L9ww7cI1pSSt3"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "hHao5x7a"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "0cwygEJttBgv7M"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "KYVCnE5AA6MnQ0Y"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "N3DcYBcrQDzD"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CUpjI7Qo17k4aeo"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "379",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "s1694CAHwowUf"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "221",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "I94vCKkpQNsx6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "123",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "RNfAfPtJK3KHE"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "213",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Gk04vo9RXpl3P"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "rkWPIUdNABAeP7V"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "GIF1vPXxInWrhl"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Oa1imYdRme"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-40985d2e0ff8",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 23,
"prompt_tokens": 1048,
"total_tokens": 1071,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "0Xx3txQF13S"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,925 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_FzhOmTdZThRndI5rSASPdAqr",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_FzhOmTdZThRndI5rSASPdAqr",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-797509666839, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-797509666839', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-797509666839|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Lk9Xf7hCFPS2tT"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "14pQ6XFvX7eSh"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "gPEg73EpAxR3FC"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "ZWJl6Mzcv95d"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "zEYaSNtwtGmhfwy"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "2tesGAvAkEOb8T6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Hykn5kSQlG"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "xWW13SGjSybVX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "fAZjisJ63a"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "FlTpZNfFG6rX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "9J9VrtXuLHug6II"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "0EckZGr823mA9"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "dW7O5HFR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " in",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "5dRdaDvaXumkV"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " its",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "kD1aZsGwZhMx"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " mixture",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "IpxDJF0p"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "WbnOG310xKaLq"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "sh58U2d8"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " architecture",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "El3"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "u3EtYZFJGaheZj"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "QjdqqIuk8c7wMUp"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Zqcwf53n0hUw"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "DfFLPM5V45QUiAm"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "797",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "55snCUEJgoLyX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "509",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "pCqEKhy1wq8Vl"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "666",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "c5QnCsKzuhFd0"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "839",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "jFSbryUeH7ZyA"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "uHktQBYsC92laeK"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "UUxHP1QGdz8MdR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "uExxZzWuXd"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-454a64d08460",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 29,
"prompt_tokens": 359,
"total_tokens": 388,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "EjpA6XzHVgcj8"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,631 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_FzhOmTdZThRndI5rSASPdAqr",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_FzhOmTdZThRndI5rSASPdAqr",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-797509666839, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-797509666839', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-797509666839|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
},
{
"role": "assistant",
"content": "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture <|file-797509666839|>."
},
{
"role": "user",
"content": "Can you tell me more about the architecture?"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_y4Py1L2VscRQ5IBZ7gGpqpWv",
"function": {
"arguments": "",
"name": "knowledge_search"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "iFdF"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "{\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "gIC"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "query",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "P"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\":\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "p"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "L",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "TAVud"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "lama",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " ",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "hHmE5"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "4",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CN4uS"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " Maver",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "ick",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "0kI"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " model",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " architecture",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "dyryTBF49"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "BHV"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "qrKh"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-4d749d8c25ad",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 22,
"prompt_tokens": 404,
"total_tokens": 426,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "ecpBTD3qjc75r"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,763 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model experts count\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-864460993305, score: 0.011418752464355166, attributes: {'filename': 'test_response_non_streaming_file_search.txt', 'chunk_id': '869ae0c0-ab85-ca6f-e5d0-024381443c27', 'document_id': 'file-864460993305', 'token_count': 10.0, 'metadata_token_count': 13.0} (cite as <|file-864460993305|>)\nLlama 4 Maverick has 128 experts\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model experts count\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "VvS2zeV5Z8apdX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "NeElmbFuPxg9F"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "RA2Dv6fH3Xp28d"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "mk2wpBSl9esL"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "WkghQrNy7WNFz7S"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "LOo1ya1Av8yejuX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Uj02OVTEBb"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "7s3FiwwwgzGhy"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "WExrPT6Yjd"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "vbf0YwoBbJsB"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "vYIgV2n0AuxwZ9F"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "lAS4gXrK4sNoq"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "90lGUcaB"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "mnFZfKgXWsjWZe"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "eOcwjhvK0vIp2nj"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "5TijFZHKoeGs"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "MWGjx7wiu4tdFha"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "864",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "k9VH32AhyY519"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "460",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "dWxZtp4i8KhxZ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "993",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "u2WHjDkGJE2hg"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "305",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "6fckZytfB9iS5"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "YGOP75uha3KyHao"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "emmym2mGHhvw9Q"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "GoEMFfNFBW"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-baa0ba98b7f3",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 23,
"prompt_tokens": 350,
"total_tokens": 373,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "ec6S325i8izl1"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,631 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_4ac6gxccWFxDvEl8BizY3BJw",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_4ac6gxccWFxDvEl8BizY3BJw",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-528246887823, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-528246887823', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-528246887823|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
},
{
"role": "assistant",
"content": "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture <|file-528246887823|>."
},
{
"role": "user",
"content": "Can you tell me more about the architecture?"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_2dn6pQIic4tAhxL0Q3R9v9oy",
"function": {
"arguments": "",
"name": "knowledge_search"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "U5u2"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "{\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "rC6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "query",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "4"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\":\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "E"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "L",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "U1RKZ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "lama",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "N9"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " ",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "eCM84"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "4",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "RNtZo"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " Maver",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "ick",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "OmQ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " model",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " architecture",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Hd8hPZl2u"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "5bs"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "eMIj"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-c0b147807a41",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 22,
"prompt_tokens": 404,
"total_tokens": 426,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "ofat2LchRvz8V"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,925 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_4ac6gxccWFxDvEl8BizY3BJw",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_4ac6gxccWFxDvEl8BizY3BJw",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-528246887823, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-528246887823', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-528246887823|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "DzrEfuLOuw4cnb"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CsVsWYnTMLfCu"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "45hLla9Dhdu3x9"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "AhCUnf7tqKqC"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "gvAEwnHAgMzITVb"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "mGUFWICkd1S0jlx"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "e85JCyNVPe"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "5vQf0h4IJTGGt"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "anovsNqaSC"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "fS6GYg8pBO8Q"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "vO7onsnvWf5kjUI"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "pdFjXciA0pN5w"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "eMMaKcAW"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " in",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "JFDRUy7B9ktO0"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " its",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "QlQIiohVPMVQ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " mixture",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "UuR2QmMR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "t0uvHdtkB4Fsl"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "3G1KX2gw"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " architecture",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "x2J"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "fbLYZDlS7xvywf"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "vAxoGpf245DPeM8"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "gLu1ZShAlH4C"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "PdMvc8X2LtbhyFU"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "528",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "0S00nwBZD0Cah"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "246",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "fa7s8AYzHjMph"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "887",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "hrwMBgH8bsKYT"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "823",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "NBJ8yJWJjBCCQ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "AAzbONdy9ExzSBR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "THiCsk4cqjABWJ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "rzm64SnHTE"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-cf185c868634",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 29,
"prompt_tokens": 359,
"total_tokens": 388,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "AnUv1BxAB2uOY"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,952 @@
{
"test_id": "tests/integration/responses/test_file_search.py::test_response_file_search_filter_compound_and[client_with_models-txt=openai/gpt-4o]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What are the engineering updates from the US?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_rST37XuKuJQcEBfmoTnNQzNe",
"type": "function",
"function": {
"name": "knowledge_search",
"arguments": "{\"query\":\"engineering updates from the US\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_rST37XuKuJQcEBfmoTnNQzNe",
"content": [
{
"type": "text",
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "[1] document_id: file-710505118847, score: 0.005345607610573921, attributes: {'region': 'us', 'category': 'engineering', 'date': 1680307200.0, 'filename': 'us_engineering_q2.txt', 'chunk_id': '084e15ad-480a-eae8-9242-391c53854867', 'document_id': 'file-710505118847', 'token_count': 18.0, 'metadata_token_count': 32.0} (cite as <|file-710505118847|>)\nUS technical updates for Q2 2023. New features deployed in the US region.\n"
},
{
"type": "text",
"text": "END of knowledge_search tool results.\n"
},
{
"type": "text",
"text": "The above results were retrieved to help answer the user's query: \"engineering updates from the US\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
}
]
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "CVT4TMzBPNlTqA"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Rlj8tcP3E7bOB"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " engineering",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "8lga"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " updates",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "6fwO0WkR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " from",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "BryajibrQvv"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "iTlMgikEguMP"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " US",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "79xbcCa6na7en"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " include",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "q7q4AkjT"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " new",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "fiyvaDyv5eet"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " features",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "cBkhZfR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " deployed",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "EaW5Ixt"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " in",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "xLVfGMTiR4OMS"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "cncqZQApoIjH"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " region",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "yiSqVtnqF"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " for",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "sbDWGbV8OoYi"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " Q",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "E1ZJCGd5c2IH7b"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "2",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "agHXieAbH98A2VE"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Ht3DkQwQs7t32Aw"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "202",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "j4r88Vvqcm7VY"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "3",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "pv9GLKOSpa0BHEr"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": " <",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "iBXT8JWz9X1J1q"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "D1gi2w0f0DN5n3k"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "file",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "zxHM3I5wmPGU"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "-",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "Gl7oL62eU6xIrUp"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "710",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "l4RX4sx1BfQA6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "505",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "AGyEWqU2sDL6e"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "118",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "BReQxn8kTEiA5"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "847",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "yN9PEtunpAkNv"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": "|",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "bKBLmRBkxlk61fP"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": ">.",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "077BDwQit7hWfz"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": null,
"obfuscation": "LOYztD3Yfb"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-d6f74a7dd25a",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_a788c5aef0",
"usage": {
"completion_tokens": 30,
"prompt_tokens": 364,
"total_tokens": 394,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "9lHtlsx9YsVH6"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -82,23 +82,37 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
@pytest.fixture(scope="session")
def sample_chunks():
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
chunks_data = [
(
"Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
"doc1",
"programming",
),
(
"Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
"doc2",
"ai",
),
(
"Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
"doc3",
"computer_science",
),
(
"Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
"doc4",
"ai",
),
]
return [
Chunk(
content="Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
metadata={"document_id": "doc1", "topic": "programming"},
),
Chunk(
content="Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
metadata={"document_id": "doc2", "topic": "ai"},
),
Chunk(
content="Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
metadata={"document_id": "doc3", "topic": "computer_science"},
),
Chunk(
content="Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
metadata={"document_id": "doc4", "topic": "ai"},
),
content=content,
chunk_id=generate_chunk_id(doc_id, content),
metadata={"document_id": doc_id, "topic": topic},
)
for content, doc_id, topic in chunks_data
]

View file

@ -13,23 +13,33 @@ from ..conftest import vector_provider_wrapper
@pytest.fixture(scope="session")
def sample_chunks():
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
chunks_data = [
(
"Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
"doc1",
),
(
"Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
"doc2",
),
(
"Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
"doc3",
),
(
"Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
"doc4",
),
]
return [
Chunk(
content="Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
metadata={"document_id": "doc1"},
),
Chunk(
content="Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
metadata={"document_id": "doc2"},
),
Chunk(
content="Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
metadata={"document_id": "doc3"},
),
Chunk(
content="Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
metadata={"document_id": "doc4"},
),
content=content,
chunk_id=generate_chunk_id(doc_id, content),
metadata={"document_id": doc_id},
)
for content, doc_id in chunks_data
]
@ -168,6 +178,7 @@ def test_insert_chunks_with_precomputed_embeddings(
chunks_with_embeddings = [
Chunk(
content="This is a test chunk with precomputed embedding.",
chunk_id="chunk1",
metadata={"document_id": "doc1", "source": "precomputed", "chunk_id": "chunk1"},
embedding=[0.1] * int(embedding_dimension),
),
@ -215,9 +226,12 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
actual_vector_store_id = register_response.id
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
chunks_with_embeddings = [
Chunk(
content="duplicate",
chunk_id=generate_chunk_id("doc1", "duplicate"),
metadata={"document_id": "doc1", "source": "precomputed"},
embedding=[0.1] * int(embedding_dimension),
),

View file

@ -192,18 +192,18 @@ async def test_create_agent_session_persistence(agents_impl, sample_agent_config
assert session_response.session_id is not None
# Verify the session was stored
session = await agents_impl.get_agents_session(agent_id, session_response.session_id)
session = await agents_impl.get_agents_session(session_response.session_id, agent_id)
assert session.session_name == "test_session"
assert session.session_id == session_response.session_id
assert session.started_at is not None
assert session.turns == []
# Delete the session
await agents_impl.delete_agents_session(agent_id, session_response.session_id)
await agents_impl.delete_agents_session(session_response.session_id, agent_id)
# Verify the session was deleted
with pytest.raises(ValueError):
await agents_impl.get_agents_session(agent_id, session_response.session_id)
await agents_impl.get_agents_session(session_response.session_id, agent_id)
@pytest.mark.parametrize("enable_session_persistence", [True, False])
@ -226,11 +226,11 @@ async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config,
assert session2.session_id in session_ids
# Delete one session
await agents_impl.delete_agents_session(agent_id, session1.session_id)
await agents_impl.delete_agents_session(session1.session_id, agent_id)
# Verify the session was deleted
with pytest.raises(ValueError):
await agents_impl.get_agents_session(agent_id, session1.session_id)
await agents_impl.get_agents_session(session1.session_id, agent_id)
# List sessions again
sessions = await agents_impl.list_agent_sessions(agent_id)

View file

@ -43,9 +43,15 @@ def embedding_dimension() -> int:
@pytest.fixture(scope="session")
def sample_chunks():
"""Generates chunks that force multiple batches for a single document to expose ID conflicts."""
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
n, k = 10, 3
sample = [
Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"})
Chunk(
content=f"Sentence {i} from document {j}",
chunk_id=generate_chunk_id(f"document-{j}", f"Sentence {i} from document {j}"),
metadata={"document_id": f"document-{j}"},
)
for j in range(k)
for i in range(n)
]
@ -53,6 +59,7 @@ def sample_chunks():
[
Chunk(
content=f"Sentence {i} from document {j + k}",
chunk_id=f"document-{j}-chunk-{i}",
chunk_metadata=ChunkMetadata(
document_id=f"document-{j + k}",
chunk_id=f"document-{j}-chunk-{i}",
@ -73,6 +80,7 @@ def sample_chunks_with_metadata():
sample = [
Chunk(
content=f"Sentence {i} from document {j}",
chunk_id=f"document-{j}-chunk-{i}",
metadata={"document_id": f"document-{j}"},
chunk_metadata=ChunkMetadata(
document_id=f"document-{j}",

View file

@ -49,9 +49,21 @@ def vector_store_id():
@pytest.fixture
def sample_chunks():
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
return [
Chunk(content="MOCK text content 1", mime_type="text/plain", metadata={"document_id": "mock-doc-1"}),
Chunk(content="MOCK text content 1", mime_type="text/plain", metadata={"document_id": "mock-doc-2"}),
Chunk(
content="MOCK text content 1",
chunk_id=generate_chunk_id("mock-doc-1", "MOCK text content 1"),
mime_type="text/plain",
metadata={"document_id": "mock-doc-1"},
),
Chunk(
content="MOCK text content 1",
chunk_id=generate_chunk_id("mock-doc-2", "MOCK text content 1"),
mime_type="text/plain",
metadata={"document_id": "mock-doc-2"},
),
]

View file

@ -434,9 +434,15 @@ async def test_query_chunks_hybrid_tie_breaking(
sqlite_vec_index, sample_embeddings, embedding_dimension, tmp_path_factory
):
"""Test tie-breaking and determinism when scores are equal."""
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
# Create two chunks with the same content and embedding
chunk1 = Chunk(content="identical", metadata={"document_id": "docA"})
chunk2 = Chunk(content="identical", metadata={"document_id": "docB"})
chunk1 = Chunk(
content="identical", chunk_id=generate_chunk_id("docA", "identical"), metadata={"document_id": "docA"}
)
chunk2 = Chunk(
content="identical", chunk_id=generate_chunk_id("docB", "identical"), metadata={"document_id": "docB"}
)
chunks = [chunk1, chunk2]
# Use the same embedding for both chunks to ensure equal scores
same_embedding = sample_embeddings[0]

View file

@ -135,10 +135,24 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
vector_io_adapter.cache["db1"] = fake_index
# Various document_id scenarios that shouldn't crash
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
chunks = [
Chunk(content="has doc_id in metadata", metadata={"document_id": "doc-1"}),
Chunk(content="no doc_id anywhere", metadata={"source": "test"}),
Chunk(content="doc_id in chunk_metadata", chunk_metadata=ChunkMetadata(document_id="doc-3")),
Chunk(
content="has doc_id in metadata",
chunk_id=generate_chunk_id("doc-1", "has doc_id in metadata"),
metadata={"document_id": "doc-1"},
),
Chunk(
content="no doc_id anywhere",
chunk_id=generate_chunk_id("unknown", "no doc_id anywhere"),
metadata={"source": "test"},
),
Chunk(
content="doc_id in chunk_metadata",
chunk_id=generate_chunk_id("doc-3", "doc_id in chunk_metadata"),
chunk_metadata=ChunkMetadata(document_id="doc-3"),
),
]
# Should work without KeyError
@ -151,7 +165,9 @@ async def test_document_id_with_invalid_type_raises_error():
from llama_stack.apis.vector_io import Chunk
# Integer document_id should raise TypeError
chunk = Chunk(content="test", metadata={"document_id": 12345})
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
chunk = Chunk(content="test", chunk_id=generate_chunk_id("test", "test"), metadata={"document_id": 12345})
with pytest.raises(TypeError) as exc_info:
_ = chunk.document_id
assert "metadata['document_id'] must be a string" in str(exc_info.value)
@ -159,7 +175,9 @@ async def test_document_id_with_invalid_type_raises_error():
async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter):
expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1])
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
expected = QueryChunksResponse(chunks=[Chunk(content="c1", chunk_id=generate_chunk_id("test", "c1"))], scores=[0.1])
fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected))
vector_io_adapter.cache["db1"] = fake_index

View file

@ -18,13 +18,12 @@ from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
def test_generate_chunk_id():
chunks = [
Chunk(content="test", metadata={"document_id": "doc-1"}),
Chunk(content="test ", metadata={"document_id": "doc-1"}),
Chunk(content="test 3", metadata={"document_id": "doc-1"}),
]
"""Test that generate_chunk_id produces expected hashes."""
chunk_id1 = generate_chunk_id("doc-1", "test")
chunk_id2 = generate_chunk_id("doc-1", "test ")
chunk_id3 = generate_chunk_id("doc-1", "test 3")
chunk_ids = sorted([chunk.chunk_id for chunk in chunks])
chunk_ids = sorted([chunk_id1, chunk_id2, chunk_id3])
assert chunk_ids == [
"31d1f9a3-c8d2-66e7-3c37-af2acd329778",
"d07dade7-29c0-cda7-df29-0249a1dcbc3e",
@ -33,42 +32,49 @@ def test_generate_chunk_id():
def test_generate_chunk_id_with_window():
chunk = Chunk(content="test", metadata={"document_id": "doc-1"})
"""Test that generate_chunk_id with chunk_window produces different IDs."""
# Create a chunk object to match the original test behavior (passing object to generate_chunk_id)
chunk = Chunk(content="test", chunk_id="placeholder", metadata={"document_id": "doc-1"})
chunk_id1 = generate_chunk_id("doc-1", chunk, chunk_window="0-1")
chunk_id2 = generate_chunk_id("doc-1", chunk, chunk_window="1-2")
assert chunk_id1 == "8630321a-d9cb-2bb6-cd28-ebf68dafd866"
assert chunk_id2 == "13a1c09a-cbda-b61a-2d1a-7baa90888685"
# Verify that different windows produce different IDs
assert chunk_id1 != chunk_id2
assert len(chunk_id1) == 36 # Valid UUID format
assert len(chunk_id2) == 36 # Valid UUID format
def test_chunk_id():
# Test with existing chunk ID
chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"})
assert chunk_with_id.chunk_id == "11704f92-42b6-61df-bf85-6473e7708fbd"
# Test with document ID in metadata
chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
assert chunk_with_doc_id.chunk_id == generate_chunk_id("doc-1", "test")
# Test chunks with ChunkMetadata
chunk_with_metadata = Chunk(
def test_chunk_creation_with_explicit_id():
"""Test that chunks can be created with explicit chunk_id."""
chunk_id = generate_chunk_id("doc-1", "test")
chunk = Chunk(
content="test",
metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"},
chunk_id=chunk_id,
metadata={"document_id": "doc-1"},
)
assert chunk.chunk_id == chunk_id
assert chunk.chunk_id == "31d1f9a3-c8d2-66e7-3c37-af2acd329778"
def test_chunk_with_metadata():
"""Test chunks with ChunkMetadata."""
chunk_id = "chunk-id-1"
chunk = Chunk(
content="test",
chunk_id=chunk_id,
metadata={"document_id": "existing-id"},
chunk_metadata=ChunkMetadata(document_id="document_1"),
)
assert chunk_with_metadata.chunk_id == "chunk-id-1"
# Test with no ID or document ID
chunk_without_id = Chunk(content="test")
generated_id = chunk_without_id.chunk_id
assert isinstance(generated_id, str) and len(generated_id) == 36 # Should be a valid UUID
assert chunk.chunk_id == "chunk-id-1"
assert chunk.document_id == "existing-id" # metadata takes precedence
def test_stored_chunk_id_alias():
# Test with existing chunk ID alias
chunk_with_alias = Chunk(content="test", metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"})
assert chunk_with_alias.chunk_id == "chunk-id-1"
serialized_chunk = chunk_with_alias.model_dump()
assert serialized_chunk["stored_chunk_id"] == "chunk-id-1"
# showing chunk_id is not serialized (i.e., a computed field)
assert "chunk_id" not in serialized_chunk
assert chunk_with_alias.stored_chunk_id == "chunk-id-1"
def test_chunk_serialization():
"""Test that chunk_id is properly serialized."""
chunk = Chunk(
content="test",
chunk_id="test-chunk-id",
metadata={"document_id": "doc-1"},
)
serialized_chunk = chunk.model_dump()
assert serialized_chunk["chunk_id"] == "test-chunk-id"
assert "chunk_id" in serialized_chunk

View file

@ -41,6 +41,7 @@ class TestRagQuery:
interleaved_content = MagicMock()
chunk = Chunk(
content=interleaved_content,
chunk_id="chunk1",
metadata={
"key1": "value1",
"token_count": 10,
@ -48,7 +49,6 @@ class TestRagQuery:
# Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert()
"document_id": "doc1",
},
stored_chunk_id="chunk1",
chunk_metadata=chunk_metadata,
)
@ -101,8 +101,8 @@ class TestRagQuery:
)
chunk1 = Chunk(
content="chunk from db1",
chunk_id="c1",
metadata={"vector_store_id": "db1", "document_id": "doc1"},
stored_chunk_id="c1",
chunk_metadata=chunk_metadata1,
)
@ -114,8 +114,8 @@ class TestRagQuery:
)
chunk2 = Chunk(
content="chunk from db2",
chunk_id="c2",
metadata={"vector_store_id": "db2", "document_id": "doc2"},
stored_chunk_id="c2",
chunk_metadata=chunk_metadata2,
)

View file

@ -26,6 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import (
content_from_doc,
make_overlapped_chunks,
)
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
# Depending on the machine, this can get parsed a couple of ways
@ -53,6 +54,7 @@ class TestChunk:
def test_chunk(self):
chunk = Chunk(
content="Example chunk content",
chunk_id=generate_chunk_id("test-doc", "Example chunk content"),
metadata={"key": "value"},
embedding=[0.1, 0.2, 0.3],
)
@ -63,6 +65,7 @@ class TestChunk:
chunk_no_embedding = Chunk(
content="Example chunk content",
chunk_id=generate_chunk_id("test-doc", "Example chunk content"),
metadata={"key": "value"},
)
assert chunk_no_embedding.embedding is None
@ -218,8 +221,8 @@ class TestVectorStoreWithIndex:
)
chunks = [
Chunk(content="Test 1", embedding=None, metadata={}),
Chunk(content="Test 2", embedding=None, metadata={}),
Chunk(content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}),
Chunk(content="Test 2", chunk_id=generate_chunk_id("test-doc", "Test 2"), embedding=None, metadata={}),
]
mock_inference_api.openai_embeddings.return_value.data = [
@ -254,8 +257,18 @@ class TestVectorStoreWithIndex:
)
chunks = [
Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3], metadata={}),
Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}),
Chunk(
content="Test 1",
chunk_id=generate_chunk_id("test-doc", "Test 1"),
embedding=[0.1, 0.2, 0.3],
metadata={},
),
Chunk(
content="Test 2",
chunk_id=generate_chunk_id("test-doc", "Test 2"),
embedding=[0.4, 0.5, 0.6],
metadata={},
),
]
await vector_store_with_index.insert_chunks(chunks)
@ -279,25 +292,47 @@ class TestVectorStoreWithIndex:
# Verify Chunk raises ValueError for invalid embedding type
with pytest.raises(ValueError, match="Input should be a valid list"):
Chunk(content="Test 1", embedding="invalid_type", metadata={})
Chunk(
content="Test 1",
chunk_id=generate_chunk_id("test-doc", "Test 1"),
embedding="invalid_type",
metadata={},
)
# Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
with pytest.raises(ValueError, match="Input should be a valid list"):
await vector_store_with_index.insert_chunks(
[
Chunk(content="Test 1", embedding=None, metadata={}),
Chunk(content="Test 2", embedding="invalid_type", metadata={}),
Chunk(
content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}
),
Chunk(
content="Test 2",
chunk_id=generate_chunk_id("test-doc", "Test 2"),
embedding="invalid_type",
metadata={},
),
]
)
# Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "):
await vector_store_with_index.insert_chunks(
Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={})
Chunk(
content="Test 1",
chunk_id=generate_chunk_id("test-doc", "Test 1"),
embedding=[0.1, "string", 0.3],
metadata={},
)
)
chunks_wrong_dim = [
Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}),
Chunk(
content="Test 1",
chunk_id=generate_chunk_id("test-doc", "Test 1"),
embedding=[0.1, 0.2, 0.3, 0.4],
metadata={},
),
]
with pytest.raises(ValueError, match="has dimension 4, expected 3"):
await vector_store_with_index.insert_chunks(chunks_wrong_dim)
@ -317,9 +352,14 @@ class TestVectorStoreWithIndex:
)
chunks = [
Chunk(content="Test 1", embedding=None, metadata={}),
Chunk(content="Test 2", embedding=[0.2, 0.2, 0.2], metadata={}),
Chunk(content="Test 3", embedding=None, metadata={}),
Chunk(content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}),
Chunk(
content="Test 2",
chunk_id=generate_chunk_id("test-doc", "Test 2"),
embedding=[0.2, 0.2, 0.2],
metadata={},
),
Chunk(content="Test 3", chunk_id=generate_chunk_id("test-doc", "Test 3"), embedding=None, metadata={}),
]
mock_inference_api.openai_embeddings.return_value.data = [