mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 18:00:36 +00:00
Merge remote-tracking branch 'upstream/main' into feat/gunicorn-production-server
This commit is contained in:
commit
b060f73e6d
70 changed files with 46290 additions and 1133 deletions
18
.github/workflows/pre-commit.yml
vendored
18
.github/workflows/pre-commit.yml
vendored
|
|
@ -43,6 +43,9 @@ jobs:
|
|||
cache: 'npm'
|
||||
cache-dependency-path: 'src/llama_stack/ui/'
|
||||
|
||||
- name: Set up uv
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||
|
||||
- name: Install npm dependencies
|
||||
run: npm ci
|
||||
working-directory: src/llama_stack/ui
|
||||
|
|
@ -52,7 +55,7 @@ jobs:
|
|||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
continue-on-error: true
|
||||
env:
|
||||
SKIP: no-commit-to-branch
|
||||
SKIP: no-commit-to-branch,mypy
|
||||
RUFF_OUTPUT_FORMAT: github
|
||||
|
||||
- name: Check pre-commit results
|
||||
|
|
@ -109,3 +112,16 @@ jobs:
|
|||
echo "$unstaged_files"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Sync dev + type_checking dependencies
|
||||
run: uv sync --group dev --group type_checking
|
||||
|
||||
- name: Run mypy (full type_checking)
|
||||
run: |
|
||||
set +e
|
||||
uv run --group dev --group type_checking mypy
|
||||
status=$?
|
||||
if [ $status -ne 0 ]; then
|
||||
echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
|
||||
fi
|
||||
exit $status
|
||||
|
|
|
|||
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -32,3 +32,6 @@ CLAUDE.md
|
|||
docs/.docusaurus/
|
||||
docs/node_modules/
|
||||
docs/static/imported-files/
|
||||
docs/docs/api-deprecated/
|
||||
docs/docs/api-experimental/
|
||||
docs/docs/api/
|
||||
|
|
|
|||
|
|
@ -57,17 +57,27 @@ repos:
|
|||
hooks:
|
||||
- id: uv-lock
|
||||
|
||||
- repo: local
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.18.2
|
||||
hooks:
|
||||
- id: mypy
|
||||
name: mypy
|
||||
additional_dependencies:
|
||||
- uv==0.7.8
|
||||
entry: uv run --group dev --group type_checking mypy
|
||||
language: python
|
||||
types: [python]
|
||||
- uv==0.6.2
|
||||
- pytest
|
||||
- rich
|
||||
- types-requests
|
||||
- pydantic
|
||||
- httpx
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: mypy-full
|
||||
name: mypy (full type_checking)
|
||||
entry: uv run --group dev --group type_checking mypy
|
||||
language: system
|
||||
pass_filenames: false
|
||||
stages: [manual]
|
||||
|
||||
# - repo: https://github.com/tcort/markdown-link-check
|
||||
# rev: v3.11.2
|
||||
|
|
@ -152,7 +162,6 @@ repos:
|
|||
files: ^src/llama_stack/ui/.*\.(ts|tsx)$
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
|
||||
- id: check-log-usage
|
||||
name: Ensure 'llama_stack.log' usage for logging
|
||||
entry: bash
|
||||
|
|
@ -171,7 +180,23 @@ repos:
|
|||
exit 1
|
||||
fi
|
||||
exit 0
|
||||
|
||||
- id: fips-compliance
|
||||
name: Ensure llama-stack remains FIPS compliant
|
||||
entry: bash
|
||||
language: system
|
||||
types: [python]
|
||||
pass_filenames: true
|
||||
exclude: '^tests/.*$' # Exclude test dir as some safety tests used MD5
|
||||
args:
|
||||
- -c
|
||||
- |
|
||||
grep -EnH '^[^#]*\b(md5|sha1|uuid3|uuid5)\b' "$@" && {
|
||||
echo;
|
||||
echo "❌ Do not use any of the following functions: hashlib.md5, hashlib.sha1, uuid.uuid3, uuid.uuid5"
|
||||
echo " These functions are not FIPS-compliant"
|
||||
echo;
|
||||
exit 1;
|
||||
} || true
|
||||
ci:
|
||||
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
|
||||
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
|
||||
|
|
|
|||
|
|
@ -61,6 +61,18 @@ uv run pre-commit run --all-files -v
|
|||
|
||||
The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify.
|
||||
|
||||
To run the expanded mypy configuration that CI enforces, use:
|
||||
|
||||
```bash
|
||||
uv run pre-commit run mypy-full --hook-stage manual --all-files
|
||||
```
|
||||
|
||||
or invoke mypy directly with all optional dependencies:
|
||||
|
||||
```bash
|
||||
uv run --group dev --group type_checking mypy
|
||||
```
|
||||
|
||||
```{caution}
|
||||
Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,610 +0,0 @@
|
|||
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
|
||||
|
||||
organization:
|
||||
# Name of your organization or company, used to determine the name of the client
|
||||
# and headings.
|
||||
name: llama-stack-client
|
||||
docs: https://llama-stack.readthedocs.io/en/latest/
|
||||
contact: llamastack@meta.com
|
||||
security:
|
||||
- {}
|
||||
- BearerAuth: []
|
||||
security_schemes:
|
||||
BearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
# `targets` define the output targets and their customization options, such as
|
||||
# whether to emit the Node SDK and what it's package name should be.
|
||||
targets:
|
||||
node:
|
||||
package_name: llama-stack-client
|
||||
production_repo: llamastack/llama-stack-client-typescript
|
||||
publish:
|
||||
npm: false
|
||||
python:
|
||||
package_name: llama_stack_client
|
||||
production_repo: llamastack/llama-stack-client-python
|
||||
options:
|
||||
use_uv: true
|
||||
publish:
|
||||
pypi: true
|
||||
project_name: llama_stack_client
|
||||
kotlin:
|
||||
reverse_domain: com.llama_stack_client.api
|
||||
production_repo: null
|
||||
publish:
|
||||
maven: false
|
||||
go:
|
||||
package_name: llama-stack-client
|
||||
production_repo: llamastack/llama-stack-client-go
|
||||
options:
|
||||
enable_v2: true
|
||||
back_compat_use_shared_package: false
|
||||
|
||||
# `client_settings` define settings for the API client, such as extra constructor
|
||||
# arguments (used for authentication), retry behavior, idempotency, etc.
|
||||
client_settings:
|
||||
default_env_prefix: LLAMA_STACK_CLIENT
|
||||
opts:
|
||||
api_key:
|
||||
type: string
|
||||
read_env: LLAMA_STACK_CLIENT_API_KEY
|
||||
auth: { security_scheme: BearerAuth }
|
||||
nullable: true
|
||||
|
||||
# `environments` are a map of the name of the environment (e.g. "sandbox",
|
||||
# "production") to the corresponding url to use.
|
||||
environments:
|
||||
production: http://any-hosted-llama-stack.com
|
||||
|
||||
# `pagination` defines [pagination schemes] which provides a template to match
|
||||
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
|
||||
pagination:
|
||||
- name: datasets_iterrows
|
||||
type: offset
|
||||
request:
|
||||
dataset_id:
|
||||
type: string
|
||||
start_index:
|
||||
type: integer
|
||||
x-stainless-pagination-property:
|
||||
purpose: offset_count_param
|
||||
limit:
|
||||
type: integer
|
||||
response:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
next_index:
|
||||
type: integer
|
||||
x-stainless-pagination-property:
|
||||
purpose: offset_count_start_field
|
||||
- name: openai_cursor_page
|
||||
type: cursor
|
||||
request:
|
||||
limit:
|
||||
type: integer
|
||||
after:
|
||||
type: string
|
||||
x-stainless-pagination-property:
|
||||
purpose: next_cursor_param
|
||||
response:
|
||||
data:
|
||||
type: array
|
||||
items: {}
|
||||
has_more:
|
||||
type: boolean
|
||||
last_id:
|
||||
type: string
|
||||
x-stainless-pagination-property:
|
||||
purpose: next_cursor_field
|
||||
# `resources` define the structure and organziation for your API, such as how
|
||||
# methods and models are grouped together and accessed. See the [configuration
|
||||
# guide] for more information.
|
||||
#
|
||||
# [configuration guide]:
|
||||
# https://app.stainlessapi.com/docs/guides/configure#resources
|
||||
resources:
|
||||
$shared:
|
||||
models:
|
||||
agent_config: AgentConfig
|
||||
interleaved_content_item: InterleavedContentItem
|
||||
interleaved_content: InterleavedContent
|
||||
param_type: ParamType
|
||||
safety_violation: SafetyViolation
|
||||
sampling_params: SamplingParams
|
||||
scoring_result: ScoringResult
|
||||
message: Message
|
||||
user_message: UserMessage
|
||||
completion_message: CompletionMessage
|
||||
tool_response_message: ToolResponseMessage
|
||||
system_message: SystemMessage
|
||||
tool_call: ToolCall
|
||||
query_result: RAGQueryResult
|
||||
document: RAGDocument
|
||||
query_config: RAGQueryConfig
|
||||
response_format: ResponseFormat
|
||||
toolgroups:
|
||||
models:
|
||||
tool_group: ToolGroup
|
||||
list_tool_groups_response: ListToolGroupsResponse
|
||||
methods:
|
||||
register: post /v1/toolgroups
|
||||
get: get /v1/toolgroups/{toolgroup_id}
|
||||
list: get /v1/toolgroups
|
||||
unregister: delete /v1/toolgroups/{toolgroup_id}
|
||||
tools:
|
||||
methods:
|
||||
get: get /v1/tools/{tool_name}
|
||||
list:
|
||||
endpoint: get /v1/tools
|
||||
paginated: false
|
||||
|
||||
tool_runtime:
|
||||
models:
|
||||
tool_def: ToolDef
|
||||
tool_invocation_result: ToolInvocationResult
|
||||
methods:
|
||||
list_tools:
|
||||
endpoint: get /v1/tool-runtime/list-tools
|
||||
paginated: false
|
||||
invoke_tool: post /v1/tool-runtime/invoke
|
||||
subresources:
|
||||
rag_tool:
|
||||
methods:
|
||||
insert: post /v1/tool-runtime/rag-tool/insert
|
||||
query: post /v1/tool-runtime/rag-tool/query
|
||||
|
||||
responses:
|
||||
models:
|
||||
response_object_stream: OpenAIResponseObjectStream
|
||||
response_object: OpenAIResponseObject
|
||||
methods:
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1/responses
|
||||
streaming:
|
||||
stream_event_model: responses.response_object_stream
|
||||
param_discriminator: stream
|
||||
retrieve: get /v1/responses/{response_id}
|
||||
list:
|
||||
type: http
|
||||
endpoint: get /v1/responses
|
||||
delete:
|
||||
type: http
|
||||
endpoint: delete /v1/responses/{response_id}
|
||||
subresources:
|
||||
input_items:
|
||||
methods:
|
||||
list:
|
||||
type: http
|
||||
endpoint: get /v1/responses/{response_id}/input_items
|
||||
|
||||
conversations:
|
||||
models:
|
||||
conversation_object: Conversation
|
||||
methods:
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1/conversations
|
||||
retrieve: get /v1/conversations/{conversation_id}
|
||||
update:
|
||||
type: http
|
||||
endpoint: post /v1/conversations/{conversation_id}
|
||||
delete:
|
||||
type: http
|
||||
endpoint: delete /v1/conversations/{conversation_id}
|
||||
subresources:
|
||||
items:
|
||||
methods:
|
||||
get:
|
||||
type: http
|
||||
endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
|
||||
list:
|
||||
type: http
|
||||
endpoint: get /v1/conversations/{conversation_id}/items
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1/conversations/{conversation_id}/items
|
||||
|
||||
inspect:
|
||||
models:
|
||||
healthInfo: HealthInfo
|
||||
providerInfo: ProviderInfo
|
||||
routeInfo: RouteInfo
|
||||
versionInfo: VersionInfo
|
||||
methods:
|
||||
health: get /v1/health
|
||||
version: get /v1/version
|
||||
|
||||
embeddings:
|
||||
models:
|
||||
create_embeddings_response: OpenAIEmbeddingsResponse
|
||||
methods:
|
||||
create: post /v1/embeddings
|
||||
|
||||
chat:
|
||||
models:
|
||||
chat_completion_chunk: OpenAIChatCompletionChunk
|
||||
subresources:
|
||||
completions:
|
||||
methods:
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1/chat/completions
|
||||
streaming:
|
||||
stream_event_model: chat.chat_completion_chunk
|
||||
param_discriminator: stream
|
||||
list:
|
||||
type: http
|
||||
endpoint: get /v1/chat/completions
|
||||
retrieve:
|
||||
type: http
|
||||
endpoint: get /v1/chat/completions/{completion_id}
|
||||
completions:
|
||||
methods:
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1/completions
|
||||
streaming:
|
||||
param_discriminator: stream
|
||||
|
||||
vector_io:
|
||||
models:
|
||||
queryChunksResponse: QueryChunksResponse
|
||||
methods:
|
||||
insert: post /v1/vector-io/insert
|
||||
query: post /v1/vector-io/query
|
||||
|
||||
vector_stores:
|
||||
models:
|
||||
vector_store: VectorStoreObject
|
||||
list_vector_stores_response: VectorStoreListResponse
|
||||
vector_store_delete_response: VectorStoreDeleteResponse
|
||||
vector_store_search_response: VectorStoreSearchResponsePage
|
||||
methods:
|
||||
create: post /v1/vector_stores
|
||||
list:
|
||||
endpoint: get /v1/vector_stores
|
||||
retrieve: get /v1/vector_stores/{vector_store_id}
|
||||
update: post /v1/vector_stores/{vector_store_id}
|
||||
delete: delete /v1/vector_stores/{vector_store_id}
|
||||
search: post /v1/vector_stores/{vector_store_id}/search
|
||||
subresources:
|
||||
files:
|
||||
models:
|
||||
vector_store_file: VectorStoreFileObject
|
||||
methods:
|
||||
list: get /v1/vector_stores/{vector_store_id}/files
|
||||
retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
|
||||
update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
|
||||
delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
|
||||
create: post /v1/vector_stores/{vector_store_id}/files
|
||||
content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
|
||||
file_batches:
|
||||
models:
|
||||
vector_store_file_batches: VectorStoreFileBatchObject
|
||||
list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
|
||||
methods:
|
||||
create: post /v1/vector_stores/{vector_store_id}/file_batches
|
||||
retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
|
||||
list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
|
||||
cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
|
||||
|
||||
models:
|
||||
models:
|
||||
model: Model
|
||||
list_models_response: ListModelsResponse
|
||||
methods:
|
||||
retrieve: get /v1/models/{model_id}
|
||||
list:
|
||||
endpoint: get /v1/models
|
||||
paginated: false
|
||||
register: post /v1/models
|
||||
unregister: delete /v1/models/{model_id}
|
||||
subresources:
|
||||
openai:
|
||||
methods:
|
||||
list:
|
||||
endpoint: get /v1/models
|
||||
paginated: false
|
||||
|
||||
providers:
|
||||
models:
|
||||
list_providers_response: ListProvidersResponse
|
||||
methods:
|
||||
list:
|
||||
endpoint: get /v1/providers
|
||||
paginated: false
|
||||
retrieve: get /v1/providers/{provider_id}
|
||||
|
||||
routes:
|
||||
models:
|
||||
list_routes_response: ListRoutesResponse
|
||||
methods:
|
||||
list:
|
||||
endpoint: get /v1/inspect/routes
|
||||
paginated: false
|
||||
|
||||
|
||||
moderations:
|
||||
models:
|
||||
create_response: ModerationObject
|
||||
methods:
|
||||
create: post /v1/moderations
|
||||
|
||||
|
||||
safety:
|
||||
models:
|
||||
run_shield_response: RunShieldResponse
|
||||
methods:
|
||||
run_shield: post /v1/safety/run-shield
|
||||
|
||||
|
||||
shields:
|
||||
models:
|
||||
shield: Shield
|
||||
list_shields_response: ListShieldsResponse
|
||||
methods:
|
||||
retrieve: get /v1/shields/{identifier}
|
||||
list:
|
||||
endpoint: get /v1/shields
|
||||
paginated: false
|
||||
register: post /v1/shields
|
||||
delete: delete /v1/shields/{identifier}
|
||||
|
||||
synthetic_data_generation:
|
||||
models:
|
||||
syntheticDataGenerationResponse: SyntheticDataGenerationResponse
|
||||
methods:
|
||||
generate: post /v1/synthetic-data-generation/generate
|
||||
|
||||
telemetry:
|
||||
models:
|
||||
span_with_status: SpanWithStatus
|
||||
trace: Trace
|
||||
query_spans_response: QuerySpansResponse
|
||||
event: Event
|
||||
query_condition: QueryCondition
|
||||
methods:
|
||||
query_traces:
|
||||
endpoint: post /v1alpha/telemetry/traces
|
||||
skip_test_reason: 'unsupported query params in java / kotlin'
|
||||
get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
|
||||
query_spans:
|
||||
endpoint: post /v1alpha/telemetry/spans
|
||||
skip_test_reason: 'unsupported query params in java / kotlin'
|
||||
query_metrics:
|
||||
endpoint: post /v1alpha/telemetry/metrics/{metric_name}
|
||||
skip_test_reason: 'unsupported query params in java / kotlin'
|
||||
# log_event: post /v1alpha/telemetry/events
|
||||
save_spans_to_dataset: post /v1alpha/telemetry/spans/export
|
||||
get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
|
||||
get_trace: get /v1alpha/telemetry/traces/{trace_id}
|
||||
|
||||
scoring:
|
||||
methods:
|
||||
score: post /v1/scoring/score
|
||||
score_batch: post /v1/scoring/score-batch
|
||||
scoring_functions:
|
||||
methods:
|
||||
retrieve: get /v1/scoring-functions/{scoring_fn_id}
|
||||
list:
|
||||
endpoint: get /v1/scoring-functions
|
||||
paginated: false
|
||||
register: post /v1/scoring-functions
|
||||
models:
|
||||
scoring_fn: ScoringFn
|
||||
scoring_fn_params: ScoringFnParams
|
||||
list_scoring_functions_response: ListScoringFunctionsResponse
|
||||
|
||||
benchmarks:
|
||||
methods:
|
||||
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
|
||||
list:
|
||||
endpoint: get /v1alpha/eval/benchmarks
|
||||
paginated: false
|
||||
register: post /v1alpha/eval/benchmarks
|
||||
models:
|
||||
benchmark: Benchmark
|
||||
list_benchmarks_response: ListBenchmarksResponse
|
||||
|
||||
files:
|
||||
methods:
|
||||
create: post /v1/files
|
||||
list: get /v1/files
|
||||
retrieve: get /v1/files/{file_id}
|
||||
delete: delete /v1/files/{file_id}
|
||||
content: get /v1/files/{file_id}/content
|
||||
models:
|
||||
file: OpenAIFileObject
|
||||
list_files_response: ListOpenAIFileResponse
|
||||
delete_file_response: OpenAIFileDeleteResponse
|
||||
|
||||
alpha:
|
||||
subresources:
|
||||
inference:
|
||||
methods:
|
||||
rerank: post /v1alpha/inference/rerank
|
||||
|
||||
post_training:
|
||||
models:
|
||||
algorithm_config: AlgorithmConfig
|
||||
post_training_job: PostTrainingJob
|
||||
list_post_training_jobs_response: ListPostTrainingJobsResponse
|
||||
methods:
|
||||
preference_optimize: post /v1alpha/post-training/preference-optimize
|
||||
supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
|
||||
subresources:
|
||||
job:
|
||||
methods:
|
||||
artifacts: get /v1alpha/post-training/job/artifacts
|
||||
cancel: post /v1alpha/post-training/job/cancel
|
||||
status: get /v1alpha/post-training/job/status
|
||||
list:
|
||||
endpoint: get /v1alpha/post-training/jobs
|
||||
paginated: false
|
||||
|
||||
eval:
|
||||
methods:
|
||||
evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
|
||||
run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
|
||||
evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
|
||||
run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
|
||||
|
||||
subresources:
|
||||
jobs:
|
||||
methods:
|
||||
cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
||||
status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
||||
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
|
||||
models:
|
||||
evaluate_response: EvaluateResponse
|
||||
benchmark_config: BenchmarkConfig
|
||||
job: Job
|
||||
|
||||
agents:
|
||||
methods:
|
||||
create: post /v1alpha/agents
|
||||
list: get /v1alpha/agents
|
||||
retrieve: get /v1alpha/agents/{agent_id}
|
||||
delete: delete /v1alpha/agents/{agent_id}
|
||||
models:
|
||||
inference_step: InferenceStep
|
||||
tool_execution_step: ToolExecutionStep
|
||||
tool_response: ToolResponse
|
||||
shield_call_step: ShieldCallStep
|
||||
memory_retrieval_step: MemoryRetrievalStep
|
||||
subresources:
|
||||
session:
|
||||
models:
|
||||
session: Session
|
||||
methods:
|
||||
list: get /v1alpha/agents/{agent_id}/sessions
|
||||
create: post /v1alpha/agents/{agent_id}/session
|
||||
delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
|
||||
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
|
||||
steps:
|
||||
methods:
|
||||
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
|
||||
turn:
|
||||
models:
|
||||
turn: Turn
|
||||
turn_response_event: AgentTurnResponseEvent
|
||||
agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
|
||||
methods:
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
|
||||
streaming:
|
||||
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
|
||||
param_discriminator: stream
|
||||
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
|
||||
resume:
|
||||
type: http
|
||||
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
|
||||
streaming:
|
||||
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
|
||||
param_discriminator: stream
|
||||
|
||||
beta:
|
||||
subresources:
|
||||
datasets:
|
||||
models:
|
||||
list_datasets_response: ListDatasetsResponse
|
||||
methods:
|
||||
register: post /v1beta/datasets
|
||||
retrieve: get /v1beta/datasets/{dataset_id}
|
||||
list:
|
||||
endpoint: get /v1beta/datasets
|
||||
paginated: false
|
||||
unregister: delete /v1beta/datasets/{dataset_id}
|
||||
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
|
||||
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
|
||||
|
||||
|
||||
settings:
|
||||
license: MIT
|
||||
unwrap_response_fields: [ data ]
|
||||
|
||||
openapi:
|
||||
transformations:
|
||||
- command: renameValue
|
||||
reason: pydantic reserved name
|
||||
args:
|
||||
filter:
|
||||
only:
|
||||
- '$.components.schemas.InferenceStep.properties.model_response'
|
||||
rename:
|
||||
python:
|
||||
property_name: 'inference_model_response'
|
||||
|
||||
# - command: renameValue
|
||||
# reason: pydantic reserved name
|
||||
# args:
|
||||
# filter:
|
||||
# only:
|
||||
# - '$.components.schemas.Model.properties.model_type'
|
||||
# rename:
|
||||
# python:
|
||||
# property_name: 'type'
|
||||
- command: mergeObject
|
||||
reason: Better return_type using enum
|
||||
args:
|
||||
target:
|
||||
- '$.components.schemas'
|
||||
object:
|
||||
ReturnType:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
type:
|
||||
enum:
|
||||
- string
|
||||
- number
|
||||
- boolean
|
||||
- array
|
||||
- object
|
||||
- json
|
||||
- union
|
||||
- chat_completion_input
|
||||
- completion_input
|
||||
- agent_turn_input
|
||||
required:
|
||||
- type
|
||||
type: object
|
||||
- command: replaceProperties
|
||||
reason: Replace return type properties with better model (see above)
|
||||
args:
|
||||
filter:
|
||||
only:
|
||||
- '$.components.schemas.ScoringFn.properties.return_type'
|
||||
- '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
|
||||
value:
|
||||
$ref: '#/components/schemas/ReturnType'
|
||||
- command: oneOfToAnyOf
|
||||
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
|
||||
- reason: For better names
|
||||
command: extractToRefs
|
||||
args:
|
||||
ref:
|
||||
target: '$.components.schemas.ToolCallDelta.properties.tool_call'
|
||||
name: '#/components/schemas/ToolCallOrString'
|
||||
|
||||
# `readme` is used to configure the code snippets that will be rendered in the
|
||||
# README.md of various SDKs. In particular, you can change the `headline`
|
||||
# snippet's endpoint and the arguments to call it with.
|
||||
readme:
|
||||
example_requests:
|
||||
default:
|
||||
type: request
|
||||
endpoint: post /v1/chat/completions
|
||||
params: &ref_0 {}
|
||||
headline:
|
||||
type: request
|
||||
endpoint: post /v1/models
|
||||
params: *ref_0
|
||||
pagination:
|
||||
type: request
|
||||
endpoint: post /v1/chat/completions
|
||||
params: {}
|
||||
|
|
@ -15,6 +15,141 @@ info:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
paths:
|
||||
/v1/batches:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A list of batch objects.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBatchesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: List all batches for the current user.
|
||||
description: List all batches for the current user.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
description: >-
|
||||
A cursor for pagination; returns batches after this batch ID.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: >-
|
||||
Number of batches to return (default 20, max 100).
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
deprecated: false
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The created batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
description: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateBatchRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Retrieve information about a specific batch.
|
||||
description: >-
|
||||
Retrieve information about a specific batch.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}/cancel:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The updated batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: Cancel a batch that is in progress.
|
||||
description: Cancel a batch that is in progress.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/chat/completions:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -4212,6 +4347,331 @@ components:
|
|||
title: Error
|
||||
description: >-
|
||||
Error response from the API. Roughly follows RFC 7807.
|
||||
ListBatchesResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
first_id:
|
||||
type: string
|
||||
last_id:
|
||||
type: string
|
||||
has_more:
|
||||
type: boolean
|
||||
default: false
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- data
|
||||
- has_more
|
||||
title: ListBatchesResponse
|
||||
description: >-
|
||||
Response containing a list of batch objects.
|
||||
CreateBatchRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_file_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of an uploaded file containing requests for the batch.
|
||||
endpoint:
|
||||
type: string
|
||||
description: >-
|
||||
The endpoint to be used for all requests in the batch.
|
||||
completion_window:
|
||||
type: string
|
||||
const: 24h
|
||||
description: >-
|
||||
The time window within which the batch should be processed.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Optional metadata for the batch.
|
||||
idempotency_key:
|
||||
type: string
|
||||
description: >-
|
||||
Optional idempotency key. When provided, enables idempotent behavior.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_file_id
|
||||
- endpoint
|
||||
- completion_window
|
||||
title: CreateBatchRequest
|
||||
Batch:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
Order:
|
||||
type: string
|
||||
enum:
|
||||
|
|
@ -10258,6 +10718,10 @@ components:
|
|||
description: >-
|
||||
The content of the chunk, which can be interleaved text, images, or other
|
||||
types.
|
||||
chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the chunk. Must be provided explicitly.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
|
@ -10278,10 +10742,6 @@ components:
|
|||
description: >-
|
||||
Optional embedding for the chunk. If not provided, it will be computed
|
||||
later.
|
||||
stored_chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
The chunk ID that is stored in the vector database. Used for backend functionality.
|
||||
chunk_metadata:
|
||||
$ref: '#/components/schemas/ChunkMetadata'
|
||||
description: >-
|
||||
|
|
@ -10290,6 +10750,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- chunk_id
|
||||
- metadata
|
||||
title: Chunk
|
||||
description: >-
|
||||
|
|
@ -13527,6 +13988,19 @@ tags:
|
|||
description: >-
|
||||
APIs for creating and interacting with agentic systems.
|
||||
x-displayName: Agents
|
||||
- name: Batches
|
||||
description: >-
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
x-displayName: >-
|
||||
The Batches API enables efficient processing of multiple requests in a single
|
||||
operation, particularly useful for processing large datasets, batch evaluation
|
||||
workflows, and cost-effective inference at scale.
|
||||
- name: Benchmarks
|
||||
description: ''
|
||||
- name: Conversations
|
||||
|
|
@ -13601,6 +14075,7 @@ x-tagGroups:
|
|||
- name: Operations
|
||||
tags:
|
||||
- Agents
|
||||
- Batches
|
||||
- Benchmarks
|
||||
- Conversations
|
||||
- DatasetIO
|
||||
|
|
|
|||
1036
docs/notebooks/llamastack_agents_getting_started_examples.ipynb
Normal file
1036
docs/notebooks/llamastack_agents_getting_started_examples.ipynb
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -242,15 +242,6 @@ const sidebars: SidebarsConfig = {
|
|||
'providers/eval/remote_nvidia'
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Telemetry',
|
||||
collapsed: true,
|
||||
items: [
|
||||
'providers/telemetry/index',
|
||||
'providers/telemetry/inline_meta-reference'
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Batches',
|
||||
|
|
|
|||
638
docs/static/deprecated-llama-stack-spec.html
vendored
638
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -1414,6 +1414,193 @@
|
|||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/batches": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A list of batch objects.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListBatchesResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "List all batches for the current user.",
|
||||
"description": "List all batches for the current user.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
"in": "query",
|
||||
"description": "A cursor for pagination; returns batches after this batch ID.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "Number of batches to return (default 20, max 100).",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The created batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Create a new batch for processing multiple API requests.",
|
||||
"description": "Create a new batch for processing multiple API requests.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateBatchRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/batches/{batch_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Retrieve information about a specific batch.",
|
||||
"description": "Retrieve information about a specific batch.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to retrieve.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/batches/{batch_id}/cancel": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The updated batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Cancel a batch that is in progress.",
|
||||
"description": "Cancel a batch that is in progress.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to cancel.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/chat/completions": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
|
@ -6401,6 +6588,451 @@
|
|||
"title": "Job",
|
||||
"description": "A job execution instance with status tracking."
|
||||
},
|
||||
"ListBatchesResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "list",
|
||||
"default": "list"
|
||||
},
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
}
|
||||
},
|
||||
"first_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"last_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"has_more": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"object",
|
||||
"data",
|
||||
"has_more"
|
||||
],
|
||||
"title": "ListBatchesResponse",
|
||||
"description": "Response containing a list of batch objects."
|
||||
},
|
||||
"CreateBatchRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_file_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of an uploaded file containing requests for the batch."
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string",
|
||||
"description": "The endpoint to be used for all requests in the batch."
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string",
|
||||
"const": "24h",
|
||||
"description": "The time window within which the batch should be processed."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Optional metadata for the batch."
|
||||
},
|
||||
"idempotency_key": {
|
||||
"type": "string",
|
||||
"description": "Optional idempotency key. When provided, enables idempotent behavior."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_file_id",
|
||||
"endpoint",
|
||||
"completion_window"
|
||||
],
|
||||
"title": "CreateBatchRequest"
|
||||
},
|
||||
"Batch": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
},
|
||||
"Order": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
|
|
@ -13505,6 +14137,11 @@
|
|||
"description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
|
||||
"x-displayName": "Agents"
|
||||
},
|
||||
{
|
||||
"name": "Batches",
|
||||
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
|
||||
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
|
||||
},
|
||||
{
|
||||
"name": "Benchmarks",
|
||||
"description": ""
|
||||
|
|
@ -13555,6 +14192,7 @@
|
|||
"name": "Operations",
|
||||
"tags": [
|
||||
"Agents",
|
||||
"Batches",
|
||||
"Benchmarks",
|
||||
"DatasetIO",
|
||||
"Datasets",
|
||||
|
|
|
|||
474
docs/static/deprecated-llama-stack-spec.yaml
vendored
474
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -1012,6 +1012,141 @@ paths:
|
|||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
/v1/openai/v1/batches:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A list of batch objects.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBatchesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: List all batches for the current user.
|
||||
description: List all batches for the current user.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
description: >-
|
||||
A cursor for pagination; returns batches after this batch ID.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: >-
|
||||
Number of batches to return (default 20, max 100).
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
deprecated: true
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The created batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
description: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateBatchRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/openai/v1/batches/{batch_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Retrieve information about a specific batch.
|
||||
description: >-
|
||||
Retrieve information about a specific batch.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
/v1/openai/v1/batches/{batch_id}/cancel:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The updated batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: Cancel a batch that is in progress.
|
||||
description: Cancel a batch that is in progress.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
/v1/openai/v1/chat/completions:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -4736,6 +4871,331 @@ components:
|
|||
title: Job
|
||||
description: >-
|
||||
A job execution instance with status tracking.
|
||||
ListBatchesResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
first_id:
|
||||
type: string
|
||||
last_id:
|
||||
type: string
|
||||
has_more:
|
||||
type: boolean
|
||||
default: false
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- data
|
||||
- has_more
|
||||
title: ListBatchesResponse
|
||||
description: >-
|
||||
Response containing a list of batch objects.
|
||||
CreateBatchRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_file_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of an uploaded file containing requests for the batch.
|
||||
endpoint:
|
||||
type: string
|
||||
description: >-
|
||||
The endpoint to be used for all requests in the batch.
|
||||
completion_window:
|
||||
type: string
|
||||
const: 24h
|
||||
description: >-
|
||||
The time window within which the batch should be processed.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Optional metadata for the batch.
|
||||
idempotency_key:
|
||||
type: string
|
||||
description: >-
|
||||
Optional idempotency key. When provided, enables idempotent behavior.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_file_id
|
||||
- endpoint
|
||||
- completion_window
|
||||
title: CreateBatchRequest
|
||||
Batch:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
Order:
|
||||
type: string
|
||||
enum:
|
||||
|
|
@ -10263,6 +10723,19 @@ tags:
|
|||
|
||||
- **Responses API**: Use the stable v1 Responses API endpoints
|
||||
x-displayName: Agents
|
||||
- name: Batches
|
||||
description: >-
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
x-displayName: >-
|
||||
The Batches API enables efficient processing of multiple requests in a single
|
||||
operation, particularly useful for processing large datasets, batch evaluation
|
||||
workflows, and cost-effective inference at scale.
|
||||
- name: Benchmarks
|
||||
description: ''
|
||||
- name: DatasetIO
|
||||
|
|
@ -10308,6 +10781,7 @@ x-tagGroups:
|
|||
- name: Operations
|
||||
tags:
|
||||
- Agents
|
||||
- Batches
|
||||
- Benchmarks
|
||||
- DatasetIO
|
||||
- Datasets
|
||||
|
|
|
|||
647
docs/static/llama-stack-spec.html
vendored
647
docs/static/llama-stack-spec.html
vendored
|
|
@ -40,6 +40,193 @@
|
|||
}
|
||||
],
|
||||
"paths": {
|
||||
"/v1/batches": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A list of batch objects.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListBatchesResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "List all batches for the current user.",
|
||||
"description": "List all batches for the current user.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
"in": "query",
|
||||
"description": "A cursor for pagination; returns batches after this batch ID.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "Number of batches to return (default 20, max 100).",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The created batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Create a new batch for processing multiple API requests.",
|
||||
"description": "Create a new batch for processing multiple API requests.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateBatchRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/batches/{batch_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Retrieve information about a specific batch.",
|
||||
"description": "Retrieve information about a specific batch.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to retrieve.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/batches/{batch_id}/cancel": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The updated batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Cancel a batch that is in progress.",
|
||||
"description": "Cancel a batch that is in progress.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to cancel.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/chat/completions": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
|
@ -4005,6 +4192,451 @@
|
|||
"title": "Error",
|
||||
"description": "Error response from the API. Roughly follows RFC 7807."
|
||||
},
|
||||
"ListBatchesResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "list",
|
||||
"default": "list"
|
||||
},
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
}
|
||||
},
|
||||
"first_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"last_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"has_more": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"object",
|
||||
"data",
|
||||
"has_more"
|
||||
],
|
||||
"title": "ListBatchesResponse",
|
||||
"description": "Response containing a list of batch objects."
|
||||
},
|
||||
"CreateBatchRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_file_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of an uploaded file containing requests for the batch."
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string",
|
||||
"description": "The endpoint to be used for all requests in the batch."
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string",
|
||||
"const": "24h",
|
||||
"description": "The time window within which the batch should be processed."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Optional metadata for the batch."
|
||||
},
|
||||
"idempotency_key": {
|
||||
"type": "string",
|
||||
"description": "Optional idempotency key. When provided, enables idempotent behavior."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_file_id",
|
||||
"endpoint",
|
||||
"completion_window"
|
||||
],
|
||||
"title": "CreateBatchRequest"
|
||||
},
|
||||
"Batch": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
},
|
||||
"Order": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
|
|
@ -11897,6 +12529,10 @@
|
|||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The content of the chunk, which can be interleaved text, images, or other types."
|
||||
},
|
||||
"chunk_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the chunk. Must be provided explicitly."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
|
|
@ -11930,10 +12566,6 @@
|
|||
},
|
||||
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
|
||||
},
|
||||
"stored_chunk_id": {
|
||||
"type": "string",
|
||||
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
|
||||
},
|
||||
"chunk_metadata": {
|
||||
"$ref": "#/components/schemas/ChunkMetadata",
|
||||
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
|
||||
|
|
@ -11942,6 +12574,7 @@
|
|||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"chunk_id",
|
||||
"metadata"
|
||||
],
|
||||
"title": "Chunk",
|
||||
|
|
@ -13288,6 +13921,11 @@
|
|||
"description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
|
||||
"x-displayName": "Agents"
|
||||
},
|
||||
{
|
||||
"name": "Batches",
|
||||
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
|
||||
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
|
||||
},
|
||||
{
|
||||
"name": "Conversations",
|
||||
"description": "Protocol for conversation management operations.",
|
||||
|
|
@ -13361,6 +13999,7 @@
|
|||
"name": "Operations",
|
||||
"tags": [
|
||||
"Agents",
|
||||
"Batches",
|
||||
"Conversations",
|
||||
"Files",
|
||||
"Inference",
|
||||
|
|
|
|||
483
docs/static/llama-stack-spec.yaml
vendored
483
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -12,6 +12,141 @@ info:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
paths:
|
||||
/v1/batches:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A list of batch objects.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBatchesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: List all batches for the current user.
|
||||
description: List all batches for the current user.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
description: >-
|
||||
A cursor for pagination; returns batches after this batch ID.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: >-
|
||||
Number of batches to return (default 20, max 100).
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
deprecated: false
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The created batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
description: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateBatchRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Retrieve information about a specific batch.
|
||||
description: >-
|
||||
Retrieve information about a specific batch.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}/cancel:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The updated batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: Cancel a batch that is in progress.
|
||||
description: Cancel a batch that is in progress.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/chat/completions:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -2999,6 +3134,331 @@ components:
|
|||
title: Error
|
||||
description: >-
|
||||
Error response from the API. Roughly follows RFC 7807.
|
||||
ListBatchesResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
first_id:
|
||||
type: string
|
||||
last_id:
|
||||
type: string
|
||||
has_more:
|
||||
type: boolean
|
||||
default: false
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- data
|
||||
- has_more
|
||||
title: ListBatchesResponse
|
||||
description: >-
|
||||
Response containing a list of batch objects.
|
||||
CreateBatchRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_file_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of an uploaded file containing requests for the batch.
|
||||
endpoint:
|
||||
type: string
|
||||
description: >-
|
||||
The endpoint to be used for all requests in the batch.
|
||||
completion_window:
|
||||
type: string
|
||||
const: 24h
|
||||
description: >-
|
||||
The time window within which the batch should be processed.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Optional metadata for the batch.
|
||||
idempotency_key:
|
||||
type: string
|
||||
description: >-
|
||||
Optional idempotency key. When provided, enables idempotent behavior.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_file_id
|
||||
- endpoint
|
||||
- completion_window
|
||||
title: CreateBatchRequest
|
||||
Batch:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
Order:
|
||||
type: string
|
||||
enum:
|
||||
|
|
@ -9045,6 +9505,10 @@ components:
|
|||
description: >-
|
||||
The content of the chunk, which can be interleaved text, images, or other
|
||||
types.
|
||||
chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the chunk. Must be provided explicitly.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
|
@ -9065,10 +9529,6 @@ components:
|
|||
description: >-
|
||||
Optional embedding for the chunk. If not provided, it will be computed
|
||||
later.
|
||||
stored_chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
The chunk ID that is stored in the vector database. Used for backend functionality.
|
||||
chunk_metadata:
|
||||
$ref: '#/components/schemas/ChunkMetadata'
|
||||
description: >-
|
||||
|
|
@ -9077,6 +9537,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- chunk_id
|
||||
- metadata
|
||||
title: Chunk
|
||||
description: >-
|
||||
|
|
@ -10143,6 +10604,19 @@ tags:
|
|||
|
||||
- `background`
|
||||
x-displayName: Agents
|
||||
- name: Batches
|
||||
description: >-
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
x-displayName: >-
|
||||
The Batches API enables efficient processing of multiple requests in a single
|
||||
operation, particularly useful for processing large datasets, batch evaluation
|
||||
workflows, and cost-effective inference at scale.
|
||||
- name: Conversations
|
||||
description: >-
|
||||
Protocol for conversation management operations.
|
||||
|
|
@ -10205,6 +10679,7 @@ x-tagGroups:
|
|||
- name: Operations
|
||||
tags:
|
||||
- Agents
|
||||
- Batches
|
||||
- Conversations
|
||||
- Files
|
||||
- Inference
|
||||
|
|
|
|||
647
docs/static/stainless-llama-stack-spec.html
vendored
647
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -40,6 +40,193 @@
|
|||
}
|
||||
],
|
||||
"paths": {
|
||||
"/v1/batches": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A list of batch objects.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListBatchesResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "List all batches for the current user.",
|
||||
"description": "List all batches for the current user.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
"in": "query",
|
||||
"description": "A cursor for pagination; returns batches after this batch ID.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "Number of batches to return (default 20, max 100).",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The created batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Create a new batch for processing multiple API requests.",
|
||||
"description": "Create a new batch for processing multiple API requests.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateBatchRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/batches/{batch_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Retrieve information about a specific batch.",
|
||||
"description": "Retrieve information about a specific batch.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to retrieve.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/batches/{batch_id}/cancel": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The updated batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Cancel a batch that is in progress.",
|
||||
"description": "Cancel a batch that is in progress.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to cancel.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/chat/completions": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
|
@ -5677,6 +5864,451 @@
|
|||
"title": "Error",
|
||||
"description": "Error response from the API. Roughly follows RFC 7807."
|
||||
},
|
||||
"ListBatchesResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "list",
|
||||
"default": "list"
|
||||
},
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
}
|
||||
},
|
||||
"first_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"last_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"has_more": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"object",
|
||||
"data",
|
||||
"has_more"
|
||||
],
|
||||
"title": "ListBatchesResponse",
|
||||
"description": "Response containing a list of batch objects."
|
||||
},
|
||||
"CreateBatchRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_file_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of an uploaded file containing requests for the batch."
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string",
|
||||
"description": "The endpoint to be used for all requests in the batch."
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string",
|
||||
"const": "24h",
|
||||
"description": "The time window within which the batch should be processed."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Optional metadata for the batch."
|
||||
},
|
||||
"idempotency_key": {
|
||||
"type": "string",
|
||||
"description": "Optional idempotency key. When provided, enables idempotent behavior."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_file_id",
|
||||
"endpoint",
|
||||
"completion_window"
|
||||
],
|
||||
"title": "CreateBatchRequest"
|
||||
},
|
||||
"Batch": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
},
|
||||
"Order": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
|
|
@ -13569,6 +14201,10 @@
|
|||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The content of the chunk, which can be interleaved text, images, or other types."
|
||||
},
|
||||
"chunk_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the chunk. Must be provided explicitly."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
|
|
@ -13602,10 +14238,6 @@
|
|||
},
|
||||
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
|
||||
},
|
||||
"stored_chunk_id": {
|
||||
"type": "string",
|
||||
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
|
||||
},
|
||||
"chunk_metadata": {
|
||||
"$ref": "#/components/schemas/ChunkMetadata",
|
||||
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
|
||||
|
|
@ -13614,6 +14246,7 @@
|
|||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"chunk_id",
|
||||
"metadata"
|
||||
],
|
||||
"title": "Chunk",
|
||||
|
|
@ -17960,6 +18593,11 @@
|
|||
"description": "APIs for creating and interacting with agentic systems.",
|
||||
"x-displayName": "Agents"
|
||||
},
|
||||
{
|
||||
"name": "Batches",
|
||||
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
|
||||
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
|
||||
},
|
||||
{
|
||||
"name": "Benchmarks",
|
||||
"description": ""
|
||||
|
|
@ -18054,6 +18692,7 @@
|
|||
"name": "Operations",
|
||||
"tags": [
|
||||
"Agents",
|
||||
"Batches",
|
||||
"Benchmarks",
|
||||
"Conversations",
|
||||
"DatasetIO",
|
||||
|
|
|
|||
483
docs/static/stainless-llama-stack-spec.yaml
vendored
483
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -15,6 +15,141 @@ info:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
paths:
|
||||
/v1/batches:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A list of batch objects.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBatchesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: List all batches for the current user.
|
||||
description: List all batches for the current user.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
description: >-
|
||||
A cursor for pagination; returns batches after this batch ID.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: >-
|
||||
Number of batches to return (default 20, max 100).
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
deprecated: false
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The created batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
description: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateBatchRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Retrieve information about a specific batch.
|
||||
description: >-
|
||||
Retrieve information about a specific batch.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}/cancel:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The updated batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: Cancel a batch that is in progress.
|
||||
description: Cancel a batch that is in progress.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/chat/completions:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -4212,6 +4347,331 @@ components:
|
|||
title: Error
|
||||
description: >-
|
||||
Error response from the API. Roughly follows RFC 7807.
|
||||
ListBatchesResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
first_id:
|
||||
type: string
|
||||
last_id:
|
||||
type: string
|
||||
has_more:
|
||||
type: boolean
|
||||
default: false
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- data
|
||||
- has_more
|
||||
title: ListBatchesResponse
|
||||
description: >-
|
||||
Response containing a list of batch objects.
|
||||
CreateBatchRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_file_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of an uploaded file containing requests for the batch.
|
||||
endpoint:
|
||||
type: string
|
||||
description: >-
|
||||
The endpoint to be used for all requests in the batch.
|
||||
completion_window:
|
||||
type: string
|
||||
const: 24h
|
||||
description: >-
|
||||
The time window within which the batch should be processed.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Optional metadata for the batch.
|
||||
idempotency_key:
|
||||
type: string
|
||||
description: >-
|
||||
Optional idempotency key. When provided, enables idempotent behavior.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_file_id
|
||||
- endpoint
|
||||
- completion_window
|
||||
title: CreateBatchRequest
|
||||
Batch:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
Order:
|
||||
type: string
|
||||
enum:
|
||||
|
|
@ -10258,6 +10718,10 @@ components:
|
|||
description: >-
|
||||
The content of the chunk, which can be interleaved text, images, or other
|
||||
types.
|
||||
chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the chunk. Must be provided explicitly.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
|
@ -10278,10 +10742,6 @@ components:
|
|||
description: >-
|
||||
Optional embedding for the chunk. If not provided, it will be computed
|
||||
later.
|
||||
stored_chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
The chunk ID that is stored in the vector database. Used for backend functionality.
|
||||
chunk_metadata:
|
||||
$ref: '#/components/schemas/ChunkMetadata'
|
||||
description: >-
|
||||
|
|
@ -10290,6 +10750,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- chunk_id
|
||||
- metadata
|
||||
title: Chunk
|
||||
description: >-
|
||||
|
|
@ -13527,6 +13988,19 @@ tags:
|
|||
description: >-
|
||||
APIs for creating and interacting with agentic systems.
|
||||
x-displayName: Agents
|
||||
- name: Batches
|
||||
description: >-
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
x-displayName: >-
|
||||
The Batches API enables efficient processing of multiple requests in a single
|
||||
operation, particularly useful for processing large datasets, batch evaluation
|
||||
workflows, and cost-effective inference at scale.
|
||||
- name: Benchmarks
|
||||
description: ''
|
||||
- name: Conversations
|
||||
|
|
@ -13601,6 +14075,7 @@ x-tagGroups:
|
|||
- name: Operations
|
||||
tags:
|
||||
- Agents
|
||||
- Batches
|
||||
- Benchmarks
|
||||
- Conversations
|
||||
- DatasetIO
|
||||
|
|
|
|||
|
|
@ -285,7 +285,6 @@ exclude = [
|
|||
"^src/llama_stack/models/llama/llama3/interface\\.py$",
|
||||
"^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
|
||||
"^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
|
||||
"^src/llama_stack/providers/inline/agents/meta_reference/",
|
||||
"^src/llama_stack/providers/inline/datasetio/localfs/",
|
||||
"^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
|
||||
"^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
|
||||
|
|
|
|||
|
|
@ -313,8 +313,20 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
fi
|
||||
echo "Using image: $IMAGE_NAME"
|
||||
|
||||
docker run -d --network host --name "$container_name" \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
# On macOS/Darwin, --network host doesn't work as expected due to Docker running in a VM
|
||||
# Use regular port mapping instead
|
||||
NETWORK_MODE=""
|
||||
PORT_MAPPINGS=""
|
||||
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
|
||||
NETWORK_MODE="--network host"
|
||||
else
|
||||
# On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
|
||||
PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
|
||||
echo "Using bridge networking with port mapping (non-Linux)"
|
||||
fi
|
||||
|
||||
docker run -d $NETWORK_MODE --name "$container_name" \
|
||||
$PORT_MAPPINGS \
|
||||
$DOCKER_ENV_VARS \
|
||||
"$IMAGE_NAME" \
|
||||
--port $LLAMA_STACK_PORT
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Annotated, Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
|
@ -202,7 +203,7 @@ class OpenAIResponseMessage(BaseModel):
|
|||
scenarios.
|
||||
"""
|
||||
|
||||
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]
|
||||
content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
|
||||
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
|
||||
type: Literal["message"] = "message"
|
||||
|
||||
|
|
@ -254,10 +255,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
|
|||
"""
|
||||
|
||||
id: str
|
||||
queries: list[str]
|
||||
queries: Sequence[str]
|
||||
status: str
|
||||
type: Literal["file_search_call"] = "file_search_call"
|
||||
results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
|
||||
results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -597,7 +598,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
id: str
|
||||
model: str
|
||||
object: Literal["response"] = "response"
|
||||
output: list[OpenAIResponseOutput]
|
||||
output: Sequence[OpenAIResponseOutput]
|
||||
parallel_tool_calls: bool = False
|
||||
previous_response_id: str | None = None
|
||||
prompt: OpenAIResponsePrompt | None = None
|
||||
|
|
@ -607,7 +608,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
# before the field was added. New responses will have this set always.
|
||||
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
|
||||
top_p: float | None = None
|
||||
tools: list[OpenAIResponseTool] | None = None
|
||||
tools: Sequence[OpenAIResponseTool] | None = None
|
||||
truncation: str | None = None
|
||||
usage: OpenAIResponseUsage | None = None
|
||||
instructions: str | None = None
|
||||
|
|
@ -1315,7 +1316,7 @@ class ListOpenAIResponseInputItem(BaseModel):
|
|||
:param object: Object type identifier, always "list"
|
||||
"""
|
||||
|
||||
data: list[OpenAIResponseInput]
|
||||
data: Sequence[OpenAIResponseInput]
|
||||
object: Literal["list"] = "list"
|
||||
|
||||
|
||||
|
|
@ -1326,7 +1327,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
|
|||
:param input: List of input items that led to this response
|
||||
"""
|
||||
|
||||
input: list[OpenAIResponseInput]
|
||||
input: Sequence[OpenAIResponseInput]
|
||||
|
||||
def to_response_object(self) -> OpenAIResponseObject:
|
||||
"""Convert to OpenAIResponseObject by excluding input field."""
|
||||
|
|
@ -1344,7 +1345,7 @@ class ListOpenAIResponseObject(BaseModel):
|
|||
:param object: Object type identifier, always "list"
|
||||
"""
|
||||
|
||||
data: list[OpenAIResponseObjectWithInput]
|
||||
data: Sequence[OpenAIResponseObjectWithInput]
|
||||
has_more: bool
|
||||
first_id: str
|
||||
last_id: str
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import uuid
|
||||
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from fastapi import Body
|
||||
|
|
@ -18,7 +17,6 @@ from llama_stack.apis.inference import InterleavedContent
|
|||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack.strong_typing.schema import register_schema
|
||||
|
||||
|
|
@ -61,38 +59,19 @@ class Chunk(BaseModel):
|
|||
"""
|
||||
A chunk of content that can be inserted into a vector database.
|
||||
:param content: The content of the chunk, which can be interleaved text, images, or other types.
|
||||
:param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
|
||||
:param chunk_id: Unique identifier for the chunk. Must be provided explicitly.
|
||||
:param metadata: Metadata associated with the chunk that will be used in the model context during inference.
|
||||
:param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
|
||||
:param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
|
||||
:param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
|
||||
The `chunk_metadata` is required backend functionality.
|
||||
"""
|
||||
|
||||
content: InterleavedContent
|
||||
chunk_id: str
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
embedding: list[float] | None = None
|
||||
# The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
|
||||
stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
|
||||
chunk_metadata: ChunkMetadata | None = None
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
def model_post_init(self, __context):
|
||||
# Extract chunk_id from metadata if present
|
||||
if self.metadata and "chunk_id" in self.metadata:
|
||||
self.stored_chunk_id = self.metadata.pop("chunk_id")
|
||||
|
||||
@property
|
||||
def chunk_id(self) -> str:
|
||||
"""Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
|
||||
if self.stored_chunk_id:
|
||||
return self.stored_chunk_id
|
||||
|
||||
if "document_id" in self.metadata:
|
||||
return generate_chunk_id(self.metadata["document_id"], str(self.content))
|
||||
|
||||
return generate_chunk_id(str(uuid.uuid4()), str(self.content))
|
||||
|
||||
@property
|
||||
def document_id(self) -> str | None:
|
||||
"""Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence."""
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ from llama_stack.core.datatypes import (
|
|||
ModelWithOwner,
|
||||
RegistryEntrySource,
|
||||
)
|
||||
from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .common import CommonRoutingTableImpl, lookup_model
|
||||
|
|
@ -42,11 +44,90 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|||
|
||||
await self.update_registered_models(provider_id, models)
|
||||
|
||||
async def _get_dynamic_models_from_provider_data(self) -> list[Model]:
|
||||
"""
|
||||
Fetch models from providers that have credentials in the current request's provider_data.
|
||||
|
||||
This allows users to see models available to them from providers that require
|
||||
per-request API keys (via X-LlamaStack-Provider-Data header).
|
||||
|
||||
Returns models with fully qualified identifiers (provider_id/model_id) but does NOT
|
||||
cache them in the registry since they are user-specific.
|
||||
"""
|
||||
provider_data = PROVIDER_DATA_VAR.get()
|
||||
if not provider_data:
|
||||
return []
|
||||
|
||||
dynamic_models = []
|
||||
|
||||
for provider_id, provider in self.impls_by_provider_id.items():
|
||||
# Check if this provider supports provider_data
|
||||
if not isinstance(provider, NeedsRequestProviderData):
|
||||
continue
|
||||
|
||||
# Check if provider has a validator (some providers like ollama don't need per-request credentials)
|
||||
spec = getattr(provider, "__provider_spec__", None)
|
||||
if not spec or not getattr(spec, "provider_data_validator", None):
|
||||
continue
|
||||
|
||||
# Validate provider_data silently - we're speculatively checking all providers
|
||||
# so validation failures are expected when user didn't provide keys for this provider
|
||||
try:
|
||||
validator = instantiate_class_type(spec.provider_data_validator)
|
||||
validator(**provider_data)
|
||||
except Exception:
|
||||
# User didn't provide credentials for this provider - skip silently
|
||||
continue
|
||||
|
||||
# Validation succeeded! User has credentials for this provider
|
||||
# Now try to list models
|
||||
try:
|
||||
models = await provider.list_models()
|
||||
if not models:
|
||||
continue
|
||||
|
||||
# Ensure models have fully qualified identifiers with provider_id prefix
|
||||
for model in models:
|
||||
# Only add prefix if model identifier doesn't already have it
|
||||
if not model.identifier.startswith(f"{provider_id}/"):
|
||||
model.identifier = f"{provider_id}/{model.provider_resource_id}"
|
||||
|
||||
dynamic_models.append(model)
|
||||
|
||||
logger.debug(f"Fetched {len(models)} models from provider {provider_id} using provider_data")
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to list models from provider {provider_id} with provider_data: {e}")
|
||||
continue
|
||||
|
||||
return dynamic_models
|
||||
|
||||
async def list_models(self) -> ListModelsResponse:
|
||||
return ListModelsResponse(data=await self.get_all_with_type("model"))
|
||||
# Get models from registry
|
||||
registry_models = await self.get_all_with_type("model")
|
||||
|
||||
# Get additional models available via provider_data (user-specific, not cached)
|
||||
dynamic_models = await self._get_dynamic_models_from_provider_data()
|
||||
|
||||
# Combine, avoiding duplicates (registry takes precedence)
|
||||
registry_identifiers = {m.identifier for m in registry_models}
|
||||
unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
|
||||
|
||||
return ListModelsResponse(data=registry_models + unique_dynamic_models)
|
||||
|
||||
async def openai_list_models(self) -> OpenAIListModelsResponse:
|
||||
models = await self.get_all_with_type("model")
|
||||
# Get models from registry
|
||||
registry_models = await self.get_all_with_type("model")
|
||||
|
||||
# Get additional models available via provider_data (user-specific, not cached)
|
||||
dynamic_models = await self._get_dynamic_models_from_provider_data()
|
||||
|
||||
# Combine, avoiding duplicates (registry takes precedence)
|
||||
registry_identifiers = {m.identifier for m in registry_models}
|
||||
unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
|
||||
|
||||
all_models = registry_models + unique_dynamic_models
|
||||
|
||||
openai_models = [
|
||||
OpenAIModel(
|
||||
id=model.identifier,
|
||||
|
|
@ -54,7 +135,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|||
created=int(time.time()),
|
||||
owned_by="llama_stack",
|
||||
)
|
||||
for model in models
|
||||
for model in all_models
|
||||
]
|
||||
return OpenAIListModelsResponse(data=openai_models)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from typing import Any
|
|||
import yaml
|
||||
|
||||
from llama_stack.apis.agents import Agents
|
||||
from llama_stack.apis.batches import Batches
|
||||
from llama_stack.apis.benchmarks import Benchmarks
|
||||
from llama_stack.apis.conversations import Conversations
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
|
|
@ -63,6 +64,7 @@ class LlamaStack(
|
|||
Providers,
|
||||
Inference,
|
||||
Agents,
|
||||
Batches,
|
||||
Safety,
|
||||
SyntheticDataGeneration,
|
||||
Datasets,
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import uuid
|
|||
import warnings
|
||||
from collections.abc import AsyncGenerator
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any, cast
|
||||
|
||||
import httpx
|
||||
|
||||
|
|
@ -125,12 +126,12 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
)
|
||||
|
||||
def turn_to_messages(self, turn: Turn) -> list[Message]:
|
||||
messages = []
|
||||
messages: list[Message] = []
|
||||
|
||||
# NOTE: if a toolcall response is in a step, we do not add it when processing the input messages
|
||||
tool_call_ids = set()
|
||||
for step in turn.steps:
|
||||
if step.step_type == StepType.tool_execution.value:
|
||||
if step.step_type == StepType.tool_execution.value and isinstance(step, ToolExecutionStep):
|
||||
for response in step.tool_responses:
|
||||
tool_call_ids.add(response.call_id)
|
||||
|
||||
|
|
@ -149,9 +150,9 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
messages.append(msg)
|
||||
|
||||
for step in turn.steps:
|
||||
if step.step_type == StepType.inference.value:
|
||||
if step.step_type == StepType.inference.value and isinstance(step, InferenceStep):
|
||||
messages.append(step.model_response)
|
||||
elif step.step_type == StepType.tool_execution.value:
|
||||
elif step.step_type == StepType.tool_execution.value and isinstance(step, ToolExecutionStep):
|
||||
for response in step.tool_responses:
|
||||
messages.append(
|
||||
ToolResponseMessage(
|
||||
|
|
@ -159,8 +160,8 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
content=response.content,
|
||||
)
|
||||
)
|
||||
elif step.step_type == StepType.shield_call.value:
|
||||
if step.violation:
|
||||
elif step.step_type == StepType.shield_call.value and isinstance(step, ShieldCallStep):
|
||||
if step.violation and step.violation.user_message:
|
||||
# CompletionMessage itself in the ShieldResponse
|
||||
messages.append(
|
||||
CompletionMessage(
|
||||
|
|
@ -174,7 +175,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
return await self.storage.create_session(name)
|
||||
|
||||
async def get_messages_from_turns(self, turns: list[Turn]) -> list[Message]:
|
||||
messages = []
|
||||
messages: list[Message] = []
|
||||
if self.agent_config.instructions != "":
|
||||
messages.append(SystemMessage(content=self.agent_config.instructions))
|
||||
|
||||
|
|
@ -231,7 +232,9 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
steps = []
|
||||
messages = await self.get_messages_from_turns(turns)
|
||||
|
||||
if is_resume:
|
||||
assert isinstance(request, AgentTurnResumeRequest)
|
||||
tool_response_messages = [
|
||||
ToolResponseMessage(call_id=x.call_id, content=x.content) for x in request.tool_responses
|
||||
]
|
||||
|
|
@ -252,42 +255,52 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
in_progress_tool_call_step = await self.storage.get_in_progress_tool_call_step(
|
||||
request.session_id, request.turn_id
|
||||
)
|
||||
now = datetime.now(UTC).isoformat()
|
||||
now_dt = datetime.now(UTC)
|
||||
tool_execution_step = ToolExecutionStep(
|
||||
step_id=(in_progress_tool_call_step.step_id if in_progress_tool_call_step else str(uuid.uuid4())),
|
||||
turn_id=request.turn_id,
|
||||
tool_calls=(in_progress_tool_call_step.tool_calls if in_progress_tool_call_step else []),
|
||||
tool_responses=request.tool_responses,
|
||||
completed_at=now,
|
||||
started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now),
|
||||
completed_at=now_dt,
|
||||
started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now_dt),
|
||||
)
|
||||
steps.append(tool_execution_step)
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepCompletePayload(
|
||||
step_type=StepType.tool_execution.value,
|
||||
step_type=StepType.tool_execution,
|
||||
step_id=tool_execution_step.step_id,
|
||||
step_details=tool_execution_step,
|
||||
)
|
||||
)
|
||||
)
|
||||
input_messages = last_turn.input_messages
|
||||
# Cast needed due to list invariance - last_turn.input_messages is the right type
|
||||
input_messages = last_turn.input_messages # type: ignore[assignment]
|
||||
|
||||
turn_id = request.turn_id
|
||||
actual_turn_id = request.turn_id
|
||||
start_time = last_turn.started_at
|
||||
else:
|
||||
assert isinstance(request, AgentTurnCreateRequest)
|
||||
messages.extend(request.messages)
|
||||
start_time = datetime.now(UTC).isoformat()
|
||||
input_messages = request.messages
|
||||
start_time = datetime.now(UTC)
|
||||
# Cast needed due to list invariance - request.messages is the right type
|
||||
input_messages = request.messages # type: ignore[assignment]
|
||||
# Use the generated turn_id from beginning of function
|
||||
actual_turn_id = turn_id if turn_id else str(uuid.uuid4())
|
||||
|
||||
output_message = None
|
||||
req_documents = request.documents if isinstance(request, AgentTurnCreateRequest) and not is_resume else None
|
||||
req_sampling = (
|
||||
self.agent_config.sampling_params if self.agent_config.sampling_params is not None else SamplingParams()
|
||||
)
|
||||
|
||||
async for chunk in self.run(
|
||||
session_id=request.session_id,
|
||||
turn_id=turn_id,
|
||||
turn_id=actual_turn_id,
|
||||
input_messages=messages,
|
||||
sampling_params=self.agent_config.sampling_params,
|
||||
sampling_params=req_sampling,
|
||||
stream=request.stream,
|
||||
documents=request.documents if not is_resume else None,
|
||||
documents=req_documents,
|
||||
):
|
||||
if isinstance(chunk, CompletionMessage):
|
||||
output_message = chunk
|
||||
|
|
@ -295,20 +308,23 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
assert isinstance(chunk, AgentTurnResponseStreamChunk), f"Unexpected type {type(chunk)}"
|
||||
event = chunk.event
|
||||
if event.payload.event_type == AgentTurnResponseEventType.step_complete.value:
|
||||
steps.append(event.payload.step_details)
|
||||
if event.payload.event_type == AgentTurnResponseEventType.step_complete.value and hasattr(
|
||||
event.payload, "step_details"
|
||||
):
|
||||
step_details = event.payload.step_details
|
||||
steps.append(step_details)
|
||||
|
||||
yield chunk
|
||||
|
||||
assert output_message is not None
|
||||
|
||||
turn = Turn(
|
||||
turn_id=turn_id,
|
||||
turn_id=actual_turn_id,
|
||||
session_id=request.session_id,
|
||||
input_messages=input_messages,
|
||||
input_messages=input_messages, # type: ignore[arg-type]
|
||||
output_message=output_message,
|
||||
started_at=start_time,
|
||||
completed_at=datetime.now(UTC).isoformat(),
|
||||
completed_at=datetime.now(UTC),
|
||||
steps=steps,
|
||||
)
|
||||
await self.storage.add_turn_to_session(request.session_id, turn)
|
||||
|
|
@ -345,9 +361,9 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
# return a "final value" for the `yield from` statement. we simulate that by yielding a
|
||||
# final boolean (to see whether an exception happened) and then explicitly testing for it.
|
||||
|
||||
if len(self.input_shields) > 0:
|
||||
if self.input_shields:
|
||||
async for res in self.run_multiple_shields_wrapper(
|
||||
turn_id, input_messages, self.input_shields, "user-input"
|
||||
turn_id, cast(list[OpenAIMessageParam], input_messages), self.input_shields, "user-input"
|
||||
):
|
||||
if isinstance(res, bool):
|
||||
return
|
||||
|
|
@ -374,9 +390,9 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
# for output shields run on the full input and output combination
|
||||
messages = input_messages + [final_response]
|
||||
|
||||
if len(self.output_shields) > 0:
|
||||
if self.output_shields:
|
||||
async for res in self.run_multiple_shields_wrapper(
|
||||
turn_id, messages, self.output_shields, "assistant-output"
|
||||
turn_id, cast(list[OpenAIMessageParam], messages), self.output_shields, "assistant-output"
|
||||
):
|
||||
if isinstance(res, bool):
|
||||
return
|
||||
|
|
@ -388,7 +404,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
async def run_multiple_shields_wrapper(
|
||||
self,
|
||||
turn_id: str,
|
||||
messages: list[Message],
|
||||
messages: list[OpenAIMessageParam],
|
||||
shields: list[str],
|
||||
touchpoint: str,
|
||||
) -> AsyncGenerator:
|
||||
|
|
@ -402,12 +418,12 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
return
|
||||
|
||||
step_id = str(uuid.uuid4())
|
||||
shield_call_start_time = datetime.now(UTC).isoformat()
|
||||
shield_call_start_time = datetime.now(UTC)
|
||||
try:
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepStartPayload(
|
||||
step_type=StepType.shield_call.value,
|
||||
step_type=StepType.shield_call,
|
||||
step_id=step_id,
|
||||
metadata=dict(touchpoint=touchpoint),
|
||||
)
|
||||
|
|
@ -419,14 +435,14 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepCompletePayload(
|
||||
step_type=StepType.shield_call.value,
|
||||
step_type=StepType.shield_call,
|
||||
step_id=step_id,
|
||||
step_details=ShieldCallStep(
|
||||
step_id=step_id,
|
||||
turn_id=turn_id,
|
||||
violation=e.violation,
|
||||
started_at=shield_call_start_time,
|
||||
completed_at=datetime.now(UTC).isoformat(),
|
||||
completed_at=datetime.now(UTC),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
|
@ -443,14 +459,14 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepCompletePayload(
|
||||
step_type=StepType.shield_call.value,
|
||||
step_type=StepType.shield_call,
|
||||
step_id=step_id,
|
||||
step_details=ShieldCallStep(
|
||||
step_id=step_id,
|
||||
turn_id=turn_id,
|
||||
violation=None,
|
||||
started_at=shield_call_start_time,
|
||||
completed_at=datetime.now(UTC).isoformat(),
|
||||
completed_at=datetime.now(UTC),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
|
@ -496,21 +512,22 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
else:
|
||||
self.tool_name_to_args[tool_name]["vector_store_ids"].append(session_info.vector_store_id)
|
||||
|
||||
output_attachments = []
|
||||
output_attachments: list[Attachment] = []
|
||||
|
||||
n_iter = await self.storage.get_num_infer_iters_in_turn(session_id, turn_id) or 0
|
||||
|
||||
# Build a map of custom tools to their definitions for faster lookup
|
||||
client_tools = {}
|
||||
for tool in self.agent_config.client_tools:
|
||||
client_tools[tool.name] = tool
|
||||
if self.agent_config.client_tools:
|
||||
for tool in self.agent_config.client_tools:
|
||||
client_tools[tool.name] = tool
|
||||
while True:
|
||||
step_id = str(uuid.uuid4())
|
||||
inference_start_time = datetime.now(UTC).isoformat()
|
||||
inference_start_time = datetime.now(UTC)
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepStartPayload(
|
||||
step_type=StepType.inference.value,
|
||||
step_type=StepType.inference,
|
||||
step_id=step_id,
|
||||
)
|
||||
)
|
||||
|
|
@ -538,7 +555,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
else:
|
||||
return value
|
||||
|
||||
def _add_type(openai_msg: dict) -> OpenAIMessageParam:
|
||||
def _add_type(openai_msg: Any) -> OpenAIMessageParam:
|
||||
# Serialize any nested Pydantic models to plain dicts
|
||||
openai_msg = _serialize_nested(openai_msg)
|
||||
|
||||
|
|
@ -588,7 +605,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
messages=openai_messages,
|
||||
tools=openai_tools if openai_tools else None,
|
||||
tool_choice=tool_choice,
|
||||
response_format=self.agent_config.response_format,
|
||||
response_format=self.agent_config.response_format, # type: ignore[arg-type]
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
max_tokens=max_tokens,
|
||||
|
|
@ -598,7 +615,8 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
# Convert OpenAI stream back to Llama Stack format
|
||||
response_stream = convert_openai_chat_completion_stream(
|
||||
openai_stream, enable_incremental_tool_calls=True
|
||||
openai_stream, # type: ignore[arg-type]
|
||||
enable_incremental_tool_calls=True,
|
||||
)
|
||||
|
||||
async for chunk in response_stream:
|
||||
|
|
@ -620,7 +638,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepProgressPayload(
|
||||
step_type=StepType.inference.value,
|
||||
step_type=StepType.inference,
|
||||
step_id=step_id,
|
||||
delta=delta,
|
||||
)
|
||||
|
|
@ -633,7 +651,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepProgressPayload(
|
||||
step_type=StepType.inference.value,
|
||||
step_type=StepType.inference,
|
||||
step_id=step_id,
|
||||
delta=delta,
|
||||
)
|
||||
|
|
@ -651,7 +669,9 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
output_attr = json.dumps(
|
||||
{
|
||||
"content": content,
|
||||
"tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
|
||||
"tool_calls": [
|
||||
json.loads(t.model_dump_json()) for t in tool_calls if isinstance(t, ToolCall)
|
||||
],
|
||||
}
|
||||
)
|
||||
span.set_attribute("output", output_attr)
|
||||
|
|
@ -667,16 +687,18 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
if tool_calls:
|
||||
content = ""
|
||||
|
||||
# Filter out string tool calls for CompletionMessage (only keep ToolCall objects)
|
||||
valid_tool_calls = [t for t in tool_calls if isinstance(t, ToolCall)]
|
||||
message = CompletionMessage(
|
||||
content=content,
|
||||
stop_reason=stop_reason,
|
||||
tool_calls=tool_calls,
|
||||
tool_calls=valid_tool_calls if valid_tool_calls else None,
|
||||
)
|
||||
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepCompletePayload(
|
||||
step_type=StepType.inference.value,
|
||||
step_type=StepType.inference,
|
||||
step_id=step_id,
|
||||
step_details=InferenceStep(
|
||||
# somewhere deep, we are re-assigning message or closing over some
|
||||
|
|
@ -686,13 +708,14 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
turn_id=turn_id,
|
||||
model_response=copy.deepcopy(message),
|
||||
started_at=inference_start_time,
|
||||
completed_at=datetime.now(UTC).isoformat(),
|
||||
completed_at=datetime.now(UTC),
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
if n_iter >= self.agent_config.max_infer_iters:
|
||||
max_iters = self.agent_config.max_infer_iters if self.agent_config.max_infer_iters is not None else 10
|
||||
if n_iter >= max_iters:
|
||||
logger.info(f"done with MAX iterations ({n_iter}), exiting.")
|
||||
# NOTE: mark end_of_turn to indicate to client that we are done with the turn
|
||||
# Do not continue the tool call loop after this point
|
||||
|
|
@ -705,14 +728,16 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
yield message
|
||||
break
|
||||
|
||||
if len(message.tool_calls) == 0:
|
||||
if not message.tool_calls or len(message.tool_calls) == 0:
|
||||
if stop_reason == StopReason.end_of_turn:
|
||||
# TODO: UPDATE RETURN TYPE TO SEND A TUPLE OF (MESSAGE, ATTACHMENTS)
|
||||
if len(output_attachments) > 0:
|
||||
if isinstance(message.content, list):
|
||||
message.content += output_attachments
|
||||
# List invariance - attachments are compatible at runtime
|
||||
message.content += output_attachments # type: ignore[arg-type]
|
||||
else:
|
||||
message.content = [message.content] + output_attachments
|
||||
# List invariance - attachments are compatible at runtime
|
||||
message.content = [message.content] + output_attachments # type: ignore[assignment]
|
||||
yield message
|
||||
else:
|
||||
logger.debug(f"completion message with EOM (iter: {n_iter}): {str(message)}")
|
||||
|
|
@ -725,11 +750,12 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
non_client_tool_calls = []
|
||||
|
||||
# Separate client and non-client tool calls
|
||||
for tool_call in message.tool_calls:
|
||||
if tool_call.tool_name in client_tools:
|
||||
client_tool_calls.append(tool_call)
|
||||
else:
|
||||
non_client_tool_calls.append(tool_call)
|
||||
if message.tool_calls:
|
||||
for tool_call in message.tool_calls:
|
||||
if tool_call.tool_name in client_tools:
|
||||
client_tool_calls.append(tool_call)
|
||||
else:
|
||||
non_client_tool_calls.append(tool_call)
|
||||
|
||||
# Process non-client tool calls first
|
||||
for tool_call in non_client_tool_calls:
|
||||
|
|
@ -737,7 +763,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepStartPayload(
|
||||
step_type=StepType.tool_execution.value,
|
||||
step_type=StepType.tool_execution,
|
||||
step_id=step_id,
|
||||
)
|
||||
)
|
||||
|
|
@ -746,7 +772,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepProgressPayload(
|
||||
step_type=StepType.tool_execution.value,
|
||||
step_type=StepType.tool_execution,
|
||||
step_id=step_id,
|
||||
delta=ToolCallDelta(
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
|
|
@ -766,7 +792,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
if self.telemetry_enabled
|
||||
else {},
|
||||
) as span:
|
||||
tool_execution_start_time = datetime.now(UTC).isoformat()
|
||||
tool_execution_start_time = datetime.now(UTC)
|
||||
tool_result = await self.execute_tool_call_maybe(
|
||||
session_id,
|
||||
tool_call,
|
||||
|
|
@ -796,14 +822,14 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
)
|
||||
],
|
||||
started_at=tool_execution_start_time,
|
||||
completed_at=datetime.now(UTC).isoformat(),
|
||||
completed_at=datetime.now(UTC),
|
||||
)
|
||||
|
||||
# Yield the step completion event
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepCompletePayload(
|
||||
step_type=StepType.tool_execution.value,
|
||||
step_type=StepType.tool_execution,
|
||||
step_id=step_id,
|
||||
step_details=tool_execution_step,
|
||||
)
|
||||
|
|
@ -833,7 +859,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
turn_id=turn_id,
|
||||
tool_calls=client_tool_calls,
|
||||
tool_responses=[],
|
||||
started_at=datetime.now(UTC).isoformat(),
|
||||
started_at=datetime.now(UTC),
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -868,19 +894,20 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
toolgroup_to_args = toolgroup_to_args or {}
|
||||
|
||||
tool_name_to_def = {}
|
||||
tool_name_to_def: dict[str, ToolDefinition] = {}
|
||||
tool_name_to_args = {}
|
||||
|
||||
for tool_def in self.agent_config.client_tools:
|
||||
if tool_name_to_def.get(tool_def.name, None):
|
||||
raise ValueError(f"Tool {tool_def.name} already exists")
|
||||
if self.agent_config.client_tools:
|
||||
for tool_def in self.agent_config.client_tools:
|
||||
if tool_name_to_def.get(tool_def.name, None):
|
||||
raise ValueError(f"Tool {tool_def.name} already exists")
|
||||
|
||||
# Use input_schema from ToolDef directly
|
||||
tool_name_to_def[tool_def.name] = ToolDefinition(
|
||||
tool_name=tool_def.name,
|
||||
description=tool_def.description,
|
||||
input_schema=tool_def.input_schema,
|
||||
)
|
||||
# Use input_schema from ToolDef directly
|
||||
tool_name_to_def[tool_def.name] = ToolDefinition(
|
||||
tool_name=tool_def.name,
|
||||
description=tool_def.description,
|
||||
input_schema=tool_def.input_schema,
|
||||
)
|
||||
for toolgroup_name_with_maybe_tool_name in agent_config_toolgroups:
|
||||
toolgroup_name, input_tool_name = self._parse_toolgroup_name(toolgroup_name_with_maybe_tool_name)
|
||||
tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_name)
|
||||
|
|
@ -908,15 +935,17 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
else:
|
||||
identifier = None
|
||||
|
||||
if tool_name_to_def.get(identifier, None):
|
||||
raise ValueError(f"Tool {identifier} already exists")
|
||||
if identifier:
|
||||
tool_name_to_def[identifier] = ToolDefinition(
|
||||
tool_name=identifier,
|
||||
# Convert BuiltinTool to string for dictionary key
|
||||
identifier_str = identifier.value if isinstance(identifier, BuiltinTool) else identifier
|
||||
if tool_name_to_def.get(identifier_str, None):
|
||||
raise ValueError(f"Tool {identifier_str} already exists")
|
||||
tool_name_to_def[identifier_str] = ToolDefinition(
|
||||
tool_name=identifier_str,
|
||||
description=tool_def.description,
|
||||
input_schema=tool_def.input_schema,
|
||||
)
|
||||
tool_name_to_args[identifier] = toolgroup_to_args.get(toolgroup_name, {})
|
||||
tool_name_to_args[identifier_str] = toolgroup_to_args.get(toolgroup_name, {})
|
||||
|
||||
self.tool_defs, self.tool_name_to_args = (
|
||||
list(tool_name_to_def.values()),
|
||||
|
|
@ -966,14 +995,17 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Failed to parse arguments for tool call: {tool_call.arguments}") from e
|
||||
|
||||
result = await self.tool_runtime_api.invoke_tool(
|
||||
tool_name=tool_name_str,
|
||||
kwargs={
|
||||
"session_id": session_id,
|
||||
# get the arguments generated by the model and augment with toolgroup arg overrides for the agent
|
||||
**args,
|
||||
**self.tool_name_to_args.get(tool_name_str, {}),
|
||||
},
|
||||
result = cast(
|
||||
ToolInvocationResult,
|
||||
await self.tool_runtime_api.invoke_tool(
|
||||
tool_name=tool_name_str,
|
||||
kwargs={
|
||||
"session_id": session_id,
|
||||
# get the arguments generated by the model and augment with toolgroup arg overrides for the agent
|
||||
**args,
|
||||
**self.tool_name_to_args.get(tool_name_str, {}),
|
||||
},
|
||||
),
|
||||
)
|
||||
logger.debug(f"tool call {tool_name_str} completed with result: {result}")
|
||||
return result
|
||||
|
|
@ -1017,7 +1049,7 @@ def _interpret_content_as_attachment(
|
|||
snippet = match.group(1)
|
||||
data = json.loads(snippet)
|
||||
return Attachment(
|
||||
url=URL(uri="file://" + data["filepath"]),
|
||||
content=URL(uri="file://" + data["filepath"]),
|
||||
mime_type=data["mimetype"],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ from llama_stack.apis.agents import (
|
|||
Document,
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
OpenAIDeleteResponseObject,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseInputTool,
|
||||
OpenAIResponseObject,
|
||||
|
|
@ -141,7 +142,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
persistence_store=(
|
||||
self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store
|
||||
),
|
||||
created_at=agent_info.created_at,
|
||||
created_at=agent_info.created_at.isoformat(),
|
||||
policy=self.policy,
|
||||
telemetry_enabled=self.telemetry_enabled,
|
||||
)
|
||||
|
|
@ -163,9 +164,9 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
agent_id: str,
|
||||
session_id: str,
|
||||
messages: list[UserMessage | ToolResponseMessage],
|
||||
toolgroups: list[AgentToolGroup] | None = None,
|
||||
documents: list[Document] | None = None,
|
||||
stream: bool | None = False,
|
||||
documents: list[Document] | None = None,
|
||||
toolgroups: list[AgentToolGroup] | None = None,
|
||||
tool_config: ToolConfig | None = None,
|
||||
) -> AsyncGenerator:
|
||||
request = AgentTurnCreateRequest(
|
||||
|
|
@ -221,6 +222,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn:
|
||||
agent = await self._get_agent_impl(agent_id)
|
||||
turn = await agent.storage.get_session_turn(session_id, turn_id)
|
||||
if turn is None:
|
||||
raise ValueError(f"Turn {turn_id} not found in session {session_id}")
|
||||
return turn
|
||||
|
||||
async def get_agents_step(self, agent_id: str, session_id: str, turn_id: str, step_id: str) -> AgentStepResponse:
|
||||
|
|
@ -232,13 +235,15 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
|
||||
async def get_agents_session(
|
||||
self,
|
||||
agent_id: str,
|
||||
session_id: str,
|
||||
agent_id: str,
|
||||
turn_ids: list[str] | None = None,
|
||||
) -> Session:
|
||||
agent = await self._get_agent_impl(agent_id)
|
||||
|
||||
session_info = await agent.storage.get_session_info(session_id)
|
||||
if session_info is None:
|
||||
raise ValueError(f"Session {session_id} not found")
|
||||
turns = await agent.storage.get_session_turns(session_id)
|
||||
if turn_ids:
|
||||
turns = [turn for turn in turns if turn.turn_id in turn_ids]
|
||||
|
|
@ -249,7 +254,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
started_at=session_info.started_at,
|
||||
)
|
||||
|
||||
async def delete_agents_session(self, agent_id: str, session_id: str) -> None:
|
||||
async def delete_agents_session(self, session_id: str, agent_id: str) -> None:
|
||||
agent = await self._get_agent_impl(agent_id)
|
||||
|
||||
# Delete turns first, then the session
|
||||
|
|
@ -302,7 +307,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
agent = Agent(
|
||||
agent_id=agent_id,
|
||||
agent_config=chat_agent.agent_config,
|
||||
created_at=chat_agent.created_at,
|
||||
created_at=datetime.fromisoformat(chat_agent.created_at),
|
||||
)
|
||||
return agent
|
||||
|
||||
|
|
@ -323,6 +328,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self,
|
||||
response_id: str,
|
||||
) -> OpenAIResponseObject:
|
||||
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
||||
return await self.openai_responses_impl.get_openai_response(response_id)
|
||||
|
||||
async def create_openai_response(
|
||||
|
|
@ -342,7 +348,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
max_infer_iters: int | None = 10,
|
||||
guardrails: list[ResponseGuardrail] | None = None,
|
||||
) -> OpenAIResponseObject:
|
||||
return await self.openai_responses_impl.create_openai_response(
|
||||
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
||||
result = await self.openai_responses_impl.create_openai_response(
|
||||
input,
|
||||
model,
|
||||
prompt,
|
||||
|
|
@ -358,6 +365,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
max_infer_iters,
|
||||
guardrails,
|
||||
)
|
||||
return result # type: ignore[no-any-return]
|
||||
|
||||
async def list_openai_responses(
|
||||
self,
|
||||
|
|
@ -366,6 +374,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
model: str | None = None,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIResponseObject:
|
||||
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
||||
return await self.openai_responses_impl.list_openai_responses(after, limit, model, order)
|
||||
|
||||
async def list_openai_response_input_items(
|
||||
|
|
@ -377,9 +386,11 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
limit: int | None = 20,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIResponseInputItem:
|
||||
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
||||
return await self.openai_responses_impl.list_openai_response_input_items(
|
||||
response_id, after, before, include, limit, order
|
||||
)
|
||||
|
||||
async def delete_openai_response(self, response_id: str) -> None:
|
||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
||||
return await self.openai_responses_impl.delete_openai_response(response_id)
|
||||
|
|
|
|||
|
|
@ -6,12 +6,14 @@
|
|||
|
||||
import json
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn
|
||||
from llama_stack.apis.common.errors import SessionNotFoundError
|
||||
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
|
||||
from llama_stack.core.access_control.datatypes import AccessRule
|
||||
from llama_stack.core.access_control.conditions import User as ProtocolUser
|
||||
from llama_stack.core.access_control.datatypes import AccessRule, Action
|
||||
from llama_stack.core.datatypes import User
|
||||
from llama_stack.core.request_headers import get_authenticated_user
|
||||
from llama_stack.log import get_logger
|
||||
|
|
@ -33,6 +35,15 @@ class AgentInfo(AgentConfig):
|
|||
created_at: datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionResource:
|
||||
"""Concrete implementation of ProtectedResource for session access control."""
|
||||
|
||||
type: str
|
||||
identifier: str
|
||||
owner: ProtocolUser # Use the protocol type for structural compatibility
|
||||
|
||||
|
||||
class AgentPersistence:
|
||||
def __init__(self, agent_id: str, kvstore: KVStore, policy: list[AccessRule]):
|
||||
self.agent_id = agent_id
|
||||
|
|
@ -53,8 +64,15 @@ class AgentPersistence:
|
|||
turns=[],
|
||||
identifier=name, # should this be qualified in any way?
|
||||
)
|
||||
if not is_action_allowed(self.policy, "create", session_info, user):
|
||||
raise AccessDeniedError("create", session_info, user)
|
||||
# Only perform access control if we have an authenticated user
|
||||
if user is not None and session_info.identifier is not None:
|
||||
resource = SessionResource(
|
||||
type=session_info.type,
|
||||
identifier=session_info.identifier,
|
||||
owner=user,
|
||||
)
|
||||
if not is_action_allowed(self.policy, Action.CREATE, resource, user):
|
||||
raise AccessDeniedError(Action.CREATE, resource, user)
|
||||
|
||||
await self.kvstore.set(
|
||||
key=f"session:{self.agent_id}:{session_id}",
|
||||
|
|
@ -62,7 +80,7 @@ class AgentPersistence:
|
|||
)
|
||||
return session_id
|
||||
|
||||
async def get_session_info(self, session_id: str) -> AgentSessionInfo:
|
||||
async def get_session_info(self, session_id: str) -> AgentSessionInfo | None:
|
||||
value = await self.kvstore.get(
|
||||
key=f"session:{self.agent_id}:{session_id}",
|
||||
)
|
||||
|
|
@ -83,7 +101,22 @@ class AgentPersistence:
|
|||
if not hasattr(session_info, "access_attributes") and not hasattr(session_info, "owner"):
|
||||
return True
|
||||
|
||||
return is_action_allowed(self.policy, "read", session_info, get_authenticated_user())
|
||||
# Get current user - if None, skip access control (e.g., in tests)
|
||||
user = get_authenticated_user()
|
||||
if user is None:
|
||||
return True
|
||||
|
||||
# Access control requires identifier and owner to be set
|
||||
if session_info.identifier is None or session_info.owner is None:
|
||||
return True
|
||||
|
||||
# At this point, both identifier and owner are guaranteed to be non-None
|
||||
resource = SessionResource(
|
||||
type=session_info.type,
|
||||
identifier=session_info.identifier,
|
||||
owner=session_info.owner,
|
||||
)
|
||||
return is_action_allowed(self.policy, Action.READ, resource, user)
|
||||
|
||||
async def get_session_if_accessible(self, session_id: str) -> AgentSessionInfo | None:
|
||||
"""Get session info if the user has access to it. For internal use by sub-session methods."""
|
||||
|
|
|
|||
|
|
@ -91,7 +91,8 @@ class OpenAIResponsesImpl:
|
|||
input: str | list[OpenAIResponseInput],
|
||||
previous_response: _OpenAIResponseObjectWithInputAndMessages,
|
||||
):
|
||||
new_input_items = previous_response.input.copy()
|
||||
# Convert Sequence to list for mutation
|
||||
new_input_items = list(previous_response.input)
|
||||
new_input_items.extend(previous_response.output)
|
||||
|
||||
if isinstance(input, str):
|
||||
|
|
@ -107,7 +108,7 @@ class OpenAIResponsesImpl:
|
|||
tools: list[OpenAIResponseInputTool] | None,
|
||||
previous_response_id: str | None,
|
||||
conversation: str | None,
|
||||
) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam]]:
|
||||
) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam], ToolContext]:
|
||||
"""Process input with optional previous response context.
|
||||
|
||||
Returns:
|
||||
|
|
@ -208,6 +209,9 @@ class OpenAIResponsesImpl:
|
|||
messages: list[OpenAIMessageParam],
|
||||
) -> None:
|
||||
new_input_id = f"msg_{uuid.uuid4()}"
|
||||
# Type input_items_data as the full OpenAIResponseInput union to avoid list invariance issues
|
||||
input_items_data: list[OpenAIResponseInput] = []
|
||||
|
||||
if isinstance(input, str):
|
||||
# synthesize a message from the input string
|
||||
input_content = OpenAIResponseInputMessageContentText(text=input)
|
||||
|
|
@ -219,7 +223,6 @@ class OpenAIResponsesImpl:
|
|||
input_items_data = [input_content_item]
|
||||
else:
|
||||
# we already have a list of messages
|
||||
input_items_data = []
|
||||
for input_item in input:
|
||||
if isinstance(input_item, OpenAIResponseMessage):
|
||||
# These may or may not already have an id, so dump to dict, check for id, and add if missing
|
||||
|
|
@ -251,7 +254,7 @@ class OpenAIResponsesImpl:
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10,
|
||||
guardrails: list[ResponseGuardrailSpec] | None = None,
|
||||
guardrails: list[str | ResponseGuardrailSpec] | None = None,
|
||||
):
|
||||
stream = bool(stream)
|
||||
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
|
||||
|
|
@ -289,16 +292,19 @@ class OpenAIResponsesImpl:
|
|||
failed_response = None
|
||||
|
||||
async for stream_chunk in stream_gen:
|
||||
if stream_chunk.type in {"response.completed", "response.incomplete"}:
|
||||
if final_response is not None:
|
||||
raise ValueError(
|
||||
"The response stream produced multiple terminal responses! "
|
||||
f"Earlier response from {final_event_type}"
|
||||
)
|
||||
final_response = stream_chunk.response
|
||||
final_event_type = stream_chunk.type
|
||||
elif stream_chunk.type == "response.failed":
|
||||
failed_response = stream_chunk.response
|
||||
match stream_chunk.type:
|
||||
case "response.completed" | "response.incomplete":
|
||||
if final_response is not None:
|
||||
raise ValueError(
|
||||
"The response stream produced multiple terminal responses! "
|
||||
f"Earlier response from {final_event_type}"
|
||||
)
|
||||
final_response = stream_chunk.response
|
||||
final_event_type = stream_chunk.type
|
||||
case "response.failed":
|
||||
failed_response = stream_chunk.response
|
||||
case _:
|
||||
pass # Other event types don't have .response
|
||||
|
||||
if failed_response is not None:
|
||||
error_message = (
|
||||
|
|
@ -326,6 +332,11 @@ class OpenAIResponsesImpl:
|
|||
max_infer_iters: int | None = 10,
|
||||
guardrail_ids: list[str] | None = None,
|
||||
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
# These should never be None when called from create_openai_response (which sets defaults)
|
||||
# but we assert here to help mypy understand the types
|
||||
assert text is not None, "text must not be None"
|
||||
assert max_infer_iters is not None, "max_infer_iters must not be None"
|
||||
|
||||
# Input preprocessing
|
||||
all_input, messages, tool_context = await self._process_input_with_previous_response(
|
||||
input, tools, previous_response_id, conversation
|
||||
|
|
@ -368,16 +379,19 @@ class OpenAIResponsesImpl:
|
|||
final_response = None
|
||||
failed_response = None
|
||||
|
||||
output_items = []
|
||||
# Type as ConversationItem to avoid list invariance issues
|
||||
output_items: list[ConversationItem] = []
|
||||
async for stream_chunk in orchestrator.create_response():
|
||||
if stream_chunk.type in {"response.completed", "response.incomplete"}:
|
||||
final_response = stream_chunk.response
|
||||
elif stream_chunk.type == "response.failed":
|
||||
failed_response = stream_chunk.response
|
||||
|
||||
if stream_chunk.type == "response.output_item.done":
|
||||
item = stream_chunk.item
|
||||
output_items.append(item)
|
||||
match stream_chunk.type:
|
||||
case "response.completed" | "response.incomplete":
|
||||
final_response = stream_chunk.response
|
||||
case "response.failed":
|
||||
failed_response = stream_chunk.response
|
||||
case "response.output_item.done":
|
||||
item = stream_chunk.item
|
||||
output_items.append(item)
|
||||
case _:
|
||||
pass # Other event types
|
||||
|
||||
# Store and sync before yielding terminal events
|
||||
# This ensures the storage/syncing happens even if the consumer breaks after receiving the event
|
||||
|
|
@ -410,7 +424,8 @@ class OpenAIResponsesImpl:
|
|||
self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem]
|
||||
) -> None:
|
||||
"""Sync content and response messages to the conversation."""
|
||||
conversation_items = []
|
||||
# Type as ConversationItem union to avoid list invariance issues
|
||||
conversation_items: list[ConversationItem] = []
|
||||
|
||||
if isinstance(input, str):
|
||||
conversation_items.append(
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ class StreamingResponseOrchestrator:
|
|||
text: OpenAIResponseText,
|
||||
max_infer_iters: int,
|
||||
tool_executor, # Will be the tool execution logic from the main class
|
||||
instructions: str,
|
||||
instructions: str | None,
|
||||
safety_api,
|
||||
guardrail_ids: list[str] | None = None,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
|
|
@ -128,7 +128,9 @@ class StreamingResponseOrchestrator:
|
|||
self.prompt = prompt
|
||||
self.sequence_number = 0
|
||||
# Store MCP tool mapping that gets built during tool processing
|
||||
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ctx.tool_context.previous_tools or {}
|
||||
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
|
||||
ctx.tool_context.previous_tools if ctx.tool_context else {}
|
||||
)
|
||||
# Track final messages after all tool executions
|
||||
self.final_messages: list[OpenAIMessageParam] = []
|
||||
# mapping for annotations
|
||||
|
|
@ -229,7 +231,8 @@ class StreamingResponseOrchestrator:
|
|||
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||
model=self.ctx.model,
|
||||
messages=messages,
|
||||
tools=self.ctx.chat_tools,
|
||||
# Pydantic models are dict-compatible but mypy treats them as distinct types
|
||||
tools=self.ctx.chat_tools, # type: ignore[arg-type]
|
||||
stream=True,
|
||||
temperature=self.ctx.temperature,
|
||||
response_format=response_format,
|
||||
|
|
@ -272,7 +275,12 @@ class StreamingResponseOrchestrator:
|
|||
|
||||
# Handle choices with no tool calls
|
||||
for choice in current_response.choices:
|
||||
if not (choice.message.tool_calls and self.ctx.response_tools):
|
||||
has_tool_calls = (
|
||||
isinstance(choice.message, OpenAIAssistantMessageParam)
|
||||
and choice.message.tool_calls
|
||||
and self.ctx.response_tools
|
||||
)
|
||||
if not has_tool_calls:
|
||||
output_messages.append(
|
||||
await convert_chat_choice_to_response_message(
|
||||
choice,
|
||||
|
|
@ -722,7 +730,10 @@ class StreamingResponseOrchestrator:
|
|||
)
|
||||
|
||||
# Accumulate arguments for final response (only for subsequent chunks)
|
||||
if not is_new_tool_call:
|
||||
if not is_new_tool_call and response_tool_call is not None:
|
||||
# Both should have functions since we're inside the tool_call.function check above
|
||||
assert response_tool_call.function is not None
|
||||
assert tool_call.function is not None
|
||||
response_tool_call.function.arguments = (
|
||||
response_tool_call.function.arguments or ""
|
||||
) + tool_call.function.arguments
|
||||
|
|
@ -747,10 +758,13 @@ class StreamingResponseOrchestrator:
|
|||
for tool_call_index in sorted(chat_response_tool_calls.keys()):
|
||||
tool_call = chat_response_tool_calls[tool_call_index]
|
||||
# Ensure that arguments, if sent back to the inference provider, are not None
|
||||
tool_call.function.arguments = tool_call.function.arguments or "{}"
|
||||
if tool_call.function:
|
||||
tool_call.function.arguments = tool_call.function.arguments or "{}"
|
||||
tool_call_item_id = tool_call_item_ids[tool_call_index]
|
||||
final_arguments = tool_call.function.arguments
|
||||
tool_call_name = chat_response_tool_calls[tool_call_index].function.name
|
||||
final_arguments: str = tool_call.function.arguments or "{}" if tool_call.function else "{}"
|
||||
func = chat_response_tool_calls[tool_call_index].function
|
||||
|
||||
tool_call_name = func.name if func else ""
|
||||
|
||||
# Check if this is an MCP tool call
|
||||
is_mcp_tool = tool_call_name and tool_call_name in self.mcp_tool_to_server
|
||||
|
|
@ -894,12 +908,11 @@ class StreamingResponseOrchestrator:
|
|||
|
||||
self.sequence_number += 1
|
||||
if tool_call.function.name and tool_call.function.name in self.mcp_tool_to_server:
|
||||
item = OpenAIResponseOutputMessageMCPCall(
|
||||
item: OpenAIResponseOutput = OpenAIResponseOutputMessageMCPCall(
|
||||
arguments="",
|
||||
name=tool_call.function.name,
|
||||
id=matching_item_id,
|
||||
server_label=self.mcp_tool_to_server[tool_call.function.name].server_label,
|
||||
status="in_progress",
|
||||
)
|
||||
elif tool_call.function.name == "web_search":
|
||||
item = OpenAIResponseOutputMessageWebSearchToolCall(
|
||||
|
|
@ -1008,7 +1021,7 @@ class StreamingResponseOrchestrator:
|
|||
description=tool.description,
|
||||
input_schema=tool.input_schema,
|
||||
)
|
||||
return convert_tooldef_to_openai_tool(tool_def)
|
||||
return convert_tooldef_to_openai_tool(tool_def) # type: ignore[return-value] # Returns dict but ChatCompletionToolParam expects TypedDict
|
||||
|
||||
# Initialize chat_tools if not already set
|
||||
if self.ctx.chat_tools is None:
|
||||
|
|
@ -1016,7 +1029,7 @@ class StreamingResponseOrchestrator:
|
|||
|
||||
for input_tool in tools:
|
||||
if input_tool.type == "function":
|
||||
self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
|
||||
self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) # type: ignore[typeddict-item,arg-type] # Dict compatible with FunctionDefinition
|
||||
elif input_tool.type in WebSearchToolTypes:
|
||||
tool_name = "web_search"
|
||||
# Need to access tool_groups_api from tool_executor
|
||||
|
|
@ -1055,8 +1068,8 @@ class StreamingResponseOrchestrator:
|
|||
if isinstance(mcp_tool.allowed_tools, list):
|
||||
always_allowed = mcp_tool.allowed_tools
|
||||
elif isinstance(mcp_tool.allowed_tools, AllowedToolsFilter):
|
||||
always_allowed = mcp_tool.allowed_tools.always
|
||||
never_allowed = mcp_tool.allowed_tools.never
|
||||
# AllowedToolsFilter only has tool_names field (not allowed/disallowed)
|
||||
always_allowed = mcp_tool.allowed_tools.tool_names
|
||||
|
||||
# Call list_mcp_tools
|
||||
tool_defs = None
|
||||
|
|
@ -1088,7 +1101,7 @@ class StreamingResponseOrchestrator:
|
|||
openai_tool = convert_tooldef_to_chat_tool(t)
|
||||
if self.ctx.chat_tools is None:
|
||||
self.ctx.chat_tools = []
|
||||
self.ctx.chat_tools.append(openai_tool)
|
||||
self.ctx.chat_tools.append(openai_tool) # type: ignore[arg-type] # Returns dict but ChatCompletionToolParam expects TypedDict
|
||||
|
||||
# Add to MCP tool mapping
|
||||
if t.name in self.mcp_tool_to_server:
|
||||
|
|
@ -1120,13 +1133,17 @@ class StreamingResponseOrchestrator:
|
|||
self, output_messages: list[OpenAIResponseOutput]
|
||||
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
# Handle all mcp tool lists from previous response that are still valid:
|
||||
for tool in self.ctx.tool_context.previous_tool_listings:
|
||||
async for evt in self._reuse_mcp_list_tools(tool, output_messages):
|
||||
yield evt
|
||||
# Process all remaining tools (including MCP tools) and emit streaming events
|
||||
if self.ctx.tool_context.tools_to_process:
|
||||
async for stream_event in self._process_new_tools(self.ctx.tool_context.tools_to_process, output_messages):
|
||||
yield stream_event
|
||||
# tool_context can be None when no tools are provided in the response request
|
||||
if self.ctx.tool_context:
|
||||
for tool in self.ctx.tool_context.previous_tool_listings:
|
||||
async for evt in self._reuse_mcp_list_tools(tool, output_messages):
|
||||
yield evt
|
||||
# Process all remaining tools (including MCP tools) and emit streaming events
|
||||
if self.ctx.tool_context.tools_to_process:
|
||||
async for stream_event in self._process_new_tools(
|
||||
self.ctx.tool_context.tools_to_process, output_messages
|
||||
):
|
||||
yield stream_event
|
||||
|
||||
def _approval_required(self, tool_name: str) -> bool:
|
||||
if tool_name not in self.mcp_tool_to_server:
|
||||
|
|
@ -1220,7 +1237,7 @@ class StreamingResponseOrchestrator:
|
|||
openai_tool = convert_tooldef_to_openai_tool(tool_def)
|
||||
if self.ctx.chat_tools is None:
|
||||
self.ctx.chat_tools = []
|
||||
self.ctx.chat_tools.append(openai_tool)
|
||||
self.ctx.chat_tools.append(openai_tool) # type: ignore[arg-type] # Returns dict but ChatCompletionToolParam expects TypedDict
|
||||
|
||||
mcp_list_message = OpenAIResponseOutputMessageMCPListTools(
|
||||
id=f"mcp_list_{uuid.uuid4()}",
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
import asyncio
|
||||
import json
|
||||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseInputToolFileSearch,
|
||||
|
|
@ -22,6 +23,7 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseObjectStreamResponseWebSearchCallSearching,
|
||||
OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
OpenAIResponseOutputMessageFileSearchToolCallResults,
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
)
|
||||
from llama_stack.apis.common.content_types import (
|
||||
|
|
@ -67,7 +69,7 @@ class ToolExecutor:
|
|||
) -> AsyncIterator[ToolExecutionResult]:
|
||||
tool_call_id = tool_call.id
|
||||
function = tool_call.function
|
||||
tool_kwargs = json.loads(function.arguments) if function.arguments else {}
|
||||
tool_kwargs = json.loads(function.arguments) if function and function.arguments else {}
|
||||
|
||||
if not function or not tool_call_id or not function.name:
|
||||
yield ToolExecutionResult(sequence_number=sequence_number)
|
||||
|
|
@ -84,7 +86,16 @@ class ToolExecutor:
|
|||
error_exc, result = await self._execute_tool(function.name, tool_kwargs, ctx, mcp_tool_to_server)
|
||||
|
||||
# Emit completion events for tool execution
|
||||
has_error = error_exc or (result and ((result.error_code and result.error_code > 0) or result.error_message))
|
||||
has_error = bool(
|
||||
error_exc
|
||||
or (
|
||||
result
|
||||
and (
|
||||
((error_code := getattr(result, "error_code", None)) and error_code > 0)
|
||||
or getattr(result, "error_message", None)
|
||||
)
|
||||
)
|
||||
)
|
||||
async for event_result in self._emit_completion_events(
|
||||
function.name, ctx, sequence_number, output_index, item_id, has_error, mcp_tool_to_server
|
||||
):
|
||||
|
|
@ -101,7 +112,9 @@ class ToolExecutor:
|
|||
sequence_number=sequence_number,
|
||||
final_output_message=output_message,
|
||||
final_input_message=input_message,
|
||||
citation_files=result.metadata.get("citation_files") if result and result.metadata else None,
|
||||
citation_files=(
|
||||
metadata.get("citation_files") if result and (metadata := getattr(result, "metadata", None)) else None
|
||||
),
|
||||
)
|
||||
|
||||
async def _execute_knowledge_search_via_vector_store(
|
||||
|
|
@ -188,8 +201,9 @@ class ToolExecutor:
|
|||
|
||||
citation_files[file_id] = filename
|
||||
|
||||
# Cast to proper InterleavedContent type (list invariance)
|
||||
return ToolInvocationResult(
|
||||
content=content_items,
|
||||
content=content_items, # type: ignore[arg-type]
|
||||
metadata={
|
||||
"document_ids": [r.file_id for r in search_results],
|
||||
"chunks": [r.content[0].text if r.content else "" for r in search_results],
|
||||
|
|
@ -209,51 +223,60 @@ class ToolExecutor:
|
|||
) -> AsyncIterator[ToolExecutionResult]:
|
||||
"""Emit progress events for tool execution start."""
|
||||
# Emit in_progress event based on tool type (only for tools with specific streaming events)
|
||||
progress_event = None
|
||||
if mcp_tool_to_server and function_name in mcp_tool_to_server:
|
||||
sequence_number += 1
|
||||
progress_event = OpenAIResponseObjectStreamResponseMcpCallInProgress(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
yield ToolExecutionResult(
|
||||
stream_event=OpenAIResponseObjectStreamResponseMcpCallInProgress(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
sequence_number=sequence_number,
|
||||
),
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
elif function_name == "web_search":
|
||||
sequence_number += 1
|
||||
progress_event = OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
yield ToolExecutionResult(
|
||||
stream_event=OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
sequence_number=sequence_number,
|
||||
),
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
elif function_name == "knowledge_search":
|
||||
sequence_number += 1
|
||||
progress_event = OpenAIResponseObjectStreamResponseFileSearchCallInProgress(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
yield ToolExecutionResult(
|
||||
stream_event=OpenAIResponseObjectStreamResponseFileSearchCallInProgress(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
sequence_number=sequence_number,
|
||||
),
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
|
||||
if progress_event:
|
||||
yield ToolExecutionResult(stream_event=progress_event, sequence_number=sequence_number)
|
||||
|
||||
# For web search, emit searching event
|
||||
if function_name == "web_search":
|
||||
sequence_number += 1
|
||||
searching_event = OpenAIResponseObjectStreamResponseWebSearchCallSearching(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
yield ToolExecutionResult(
|
||||
stream_event=OpenAIResponseObjectStreamResponseWebSearchCallSearching(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
sequence_number=sequence_number,
|
||||
),
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
|
||||
|
||||
# For file search, emit searching event
|
||||
if function_name == "knowledge_search":
|
||||
sequence_number += 1
|
||||
searching_event = OpenAIResponseObjectStreamResponseFileSearchCallSearching(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
yield ToolExecutionResult(
|
||||
stream_event=OpenAIResponseObjectStreamResponseFileSearchCallSearching(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
sequence_number=sequence_number,
|
||||
),
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
|
||||
|
||||
async def _execute_tool(
|
||||
self,
|
||||
|
|
@ -261,7 +284,7 @@ class ToolExecutor:
|
|||
tool_kwargs: dict,
|
||||
ctx: ChatCompletionContext,
|
||||
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
|
||||
) -> tuple[Exception | None, any]:
|
||||
) -> tuple[Exception | None, Any]:
|
||||
"""Execute the tool and return error exception and result."""
|
||||
error_exc = None
|
||||
result = None
|
||||
|
|
@ -284,9 +307,13 @@ class ToolExecutor:
|
|||
kwargs=tool_kwargs,
|
||||
)
|
||||
elif function_name == "knowledge_search":
|
||||
response_file_search_tool = next(
|
||||
(t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
|
||||
None,
|
||||
response_file_search_tool = (
|
||||
next(
|
||||
(t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
|
||||
None,
|
||||
)
|
||||
if ctx.response_tools
|
||||
else None
|
||||
)
|
||||
if response_file_search_tool:
|
||||
# Use vector_stores.search API instead of knowledge_search tool
|
||||
|
|
@ -322,35 +349,34 @@ class ToolExecutor:
|
|||
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
|
||||
) -> AsyncIterator[ToolExecutionResult]:
|
||||
"""Emit completion or failure events for tool execution."""
|
||||
completion_event = None
|
||||
|
||||
if mcp_tool_to_server and function_name in mcp_tool_to_server:
|
||||
sequence_number += 1
|
||||
if has_error:
|
||||
completion_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
|
||||
mcp_failed_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
yield ToolExecutionResult(stream_event=mcp_failed_event, sequence_number=sequence_number)
|
||||
else:
|
||||
completion_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
|
||||
mcp_completed_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
yield ToolExecutionResult(stream_event=mcp_completed_event, sequence_number=sequence_number)
|
||||
elif function_name == "web_search":
|
||||
sequence_number += 1
|
||||
completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
|
||||
web_completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
yield ToolExecutionResult(stream_event=web_completion_event, sequence_number=sequence_number)
|
||||
elif function_name == "knowledge_search":
|
||||
sequence_number += 1
|
||||
completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted(
|
||||
file_completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted(
|
||||
item_id=item_id,
|
||||
output_index=output_index,
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
|
||||
if completion_event:
|
||||
yield ToolExecutionResult(stream_event=completion_event, sequence_number=sequence_number)
|
||||
yield ToolExecutionResult(stream_event=file_completion_event, sequence_number=sequence_number)
|
||||
|
||||
async def _build_result_messages(
|
||||
self,
|
||||
|
|
@ -360,21 +386,18 @@ class ToolExecutor:
|
|||
tool_kwargs: dict,
|
||||
ctx: ChatCompletionContext,
|
||||
error_exc: Exception | None,
|
||||
result: any,
|
||||
result: Any,
|
||||
has_error: bool,
|
||||
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
|
||||
) -> tuple[any, any]:
|
||||
) -> tuple[Any, Any]:
|
||||
"""Build output and input messages from tool execution results."""
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
interleaved_content_as_str,
|
||||
)
|
||||
|
||||
# Build output message
|
||||
message: Any
|
||||
if mcp_tool_to_server and function.name in mcp_tool_to_server:
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
)
|
||||
|
||||
message = OpenAIResponseOutputMessageMCPCall(
|
||||
id=item_id,
|
||||
arguments=function.arguments,
|
||||
|
|
@ -383,10 +406,14 @@ class ToolExecutor:
|
|||
)
|
||||
if error_exc:
|
||||
message.error = str(error_exc)
|
||||
elif (result and result.error_code and result.error_code > 0) or (result and result.error_message):
|
||||
message.error = f"Error (code {result.error_code}): {result.error_message}"
|
||||
elif result and result.content:
|
||||
message.output = interleaved_content_as_str(result.content)
|
||||
elif (result and (error_code := getattr(result, "error_code", None)) and error_code > 0) or (
|
||||
result and getattr(result, "error_message", None)
|
||||
):
|
||||
ec = getattr(result, "error_code", "unknown")
|
||||
em = getattr(result, "error_message", "")
|
||||
message.error = f"Error (code {ec}): {em}"
|
||||
elif result and (content := getattr(result, "content", None)):
|
||||
message.output = interleaved_content_as_str(content)
|
||||
else:
|
||||
if function.name == "web_search":
|
||||
message = OpenAIResponseOutputMessageWebSearchToolCall(
|
||||
|
|
@ -401,17 +428,17 @@ class ToolExecutor:
|
|||
queries=[tool_kwargs.get("query", "")],
|
||||
status="completed",
|
||||
)
|
||||
if result and "document_ids" in result.metadata:
|
||||
if result and (metadata := getattr(result, "metadata", None)) and "document_ids" in metadata:
|
||||
message.results = []
|
||||
for i, doc_id in enumerate(result.metadata["document_ids"]):
|
||||
text = result.metadata["chunks"][i] if "chunks" in result.metadata else None
|
||||
score = result.metadata["scores"][i] if "scores" in result.metadata else None
|
||||
for i, doc_id in enumerate(metadata["document_ids"]):
|
||||
text = metadata["chunks"][i] if "chunks" in metadata else None
|
||||
score = metadata["scores"][i] if "scores" in metadata else None
|
||||
message.results.append(
|
||||
OpenAIResponseOutputMessageFileSearchToolCallResults(
|
||||
file_id=doc_id,
|
||||
filename=doc_id,
|
||||
text=text,
|
||||
score=score,
|
||||
text=text if text is not None else "",
|
||||
score=score if score is not None else 0.0,
|
||||
attributes={},
|
||||
)
|
||||
)
|
||||
|
|
@ -421,27 +448,32 @@ class ToolExecutor:
|
|||
raise ValueError(f"Unknown tool {function.name} called")
|
||||
|
||||
# Build input message
|
||||
input_message = None
|
||||
if result and result.content:
|
||||
if isinstance(result.content, str):
|
||||
content = result.content
|
||||
elif isinstance(result.content, list):
|
||||
content = []
|
||||
for item in result.content:
|
||||
input_message: OpenAIToolMessageParam | None = None
|
||||
if result and (result_content := getattr(result, "content", None)):
|
||||
# all the mypy contortions here are still unsatisfactory with random Any typing
|
||||
if isinstance(result_content, str):
|
||||
msg_content: str | list[Any] = result_content
|
||||
elif isinstance(result_content, list):
|
||||
content_list: list[Any] = []
|
||||
for item in result_content:
|
||||
part: Any
|
||||
if isinstance(item, TextContentItem):
|
||||
part = OpenAIChatCompletionContentPartTextParam(text=item.text)
|
||||
elif isinstance(item, ImageContentItem):
|
||||
if item.image.data:
|
||||
url = f"data:image;base64,{item.image.data}"
|
||||
url_value = f"data:image;base64,{item.image.data}"
|
||||
else:
|
||||
url = item.image.url
|
||||
part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url))
|
||||
url_value = str(item.image.url) if item.image.url else ""
|
||||
part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url_value))
|
||||
else:
|
||||
raise ValueError(f"Unknown result content type: {type(item)}")
|
||||
content.append(part)
|
||||
content_list.append(part)
|
||||
msg_content = content_list
|
||||
else:
|
||||
raise ValueError(f"Unknown result content type: {type(result.content)}")
|
||||
input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id)
|
||||
raise ValueError(f"Unknown result content type: {type(result_content)}")
|
||||
# OpenAIToolMessageParam accepts str | list[TextParam] but we may have images
|
||||
# This is runtime-safe as the API accepts it, but mypy complains
|
||||
input_message = OpenAIToolMessageParam(content=msg_content, tool_call_id=tool_call_id) # type: ignore[arg-type]
|
||||
else:
|
||||
text = str(error_exc) if error_exc else "Tool execution failed"
|
||||
input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import cast
|
||||
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from pydantic import BaseModel
|
||||
|
|
@ -100,17 +101,19 @@ class ToolContext(BaseModel):
|
|||
if isinstance(tool, OpenAIResponseToolMCP):
|
||||
previous_tools_by_label[tool.server_label] = tool
|
||||
# collect tool definitions which are the same in current and previous requests:
|
||||
tools_to_process = []
|
||||
tools_to_process: list[OpenAIResponseInputTool] = []
|
||||
matched: dict[str, OpenAIResponseInputToolMCP] = {}
|
||||
for tool in self.current_tools:
|
||||
# Mypy confuses OpenAIResponseInputTool (Input union) with OpenAIResponseTool (output union)
|
||||
# which differ only in MCP type (InputToolMCP vs ToolMCP). Code is correct.
|
||||
for tool in cast(list[OpenAIResponseInputTool], self.current_tools): # type: ignore[assignment]
|
||||
if isinstance(tool, OpenAIResponseInputToolMCP) and tool.server_label in previous_tools_by_label:
|
||||
previous_tool = previous_tools_by_label[tool.server_label]
|
||||
if previous_tool.allowed_tools == tool.allowed_tools:
|
||||
matched[tool.server_label] = tool
|
||||
else:
|
||||
tools_to_process.append(tool)
|
||||
tools_to_process.append(tool) # type: ignore[arg-type]
|
||||
else:
|
||||
tools_to_process.append(tool)
|
||||
tools_to_process.append(tool) # type: ignore[arg-type]
|
||||
# tools that are not the same or were not previously defined need to be processed:
|
||||
self.tools_to_process = tools_to_process
|
||||
# for all matched definitions, get the mcp_list_tools objects from the previous output:
|
||||
|
|
@ -119,9 +122,11 @@ class ToolContext(BaseModel):
|
|||
]
|
||||
# reconstruct the tool to server mappings that can be reused:
|
||||
for listing in self.previous_tool_listings:
|
||||
# listing is OpenAIResponseOutputMessageMCPListTools which has tools: list[MCPListToolsTool]
|
||||
definition = matched[listing.server_label]
|
||||
for tool in listing.tools:
|
||||
self.previous_tools[tool.name] = definition
|
||||
for mcp_tool in listing.tools:
|
||||
# mcp_tool is MCPListToolsTool which has a name: str field
|
||||
self.previous_tools[mcp_tool.name] = definition
|
||||
|
||||
def available_tools(self) -> list[OpenAIResponseTool]:
|
||||
if not self.current_tools:
|
||||
|
|
@ -139,6 +144,8 @@ class ToolContext(BaseModel):
|
|||
server_label=tool.server_label,
|
||||
allowed_tools=tool.allowed_tools,
|
||||
)
|
||||
# Exhaustive check - all tool types should be handled above
|
||||
raise AssertionError(f"Unexpected tool type: {type(tool)}")
|
||||
|
||||
return [convert_tool(tool) for tool in self.current_tools]
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
import asyncio
|
||||
import re
|
||||
import uuid
|
||||
from collections.abc import Sequence
|
||||
|
||||
from llama_stack.apis.agents.agents import ResponseGuardrailSpec
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
|
|
@ -71,14 +72,14 @@ async def convert_chat_choice_to_response_message(
|
|||
|
||||
return OpenAIResponseMessage(
|
||||
id=message_id or f"msg_{uuid.uuid4()}",
|
||||
content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)],
|
||||
content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=list(annotations))],
|
||||
status="completed",
|
||||
role="assistant",
|
||||
)
|
||||
|
||||
|
||||
async def convert_response_content_to_chat_content(
|
||||
content: (str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]),
|
||||
content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
|
||||
) -> str | list[OpenAIChatCompletionContentPartParam]:
|
||||
"""
|
||||
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
|
||||
|
|
@ -88,7 +89,8 @@ async def convert_response_content_to_chat_content(
|
|||
if isinstance(content, str):
|
||||
return content
|
||||
|
||||
converted_parts = []
|
||||
# Type with union to avoid list invariance issues
|
||||
converted_parts: list[OpenAIChatCompletionContentPartParam] = []
|
||||
for content_part in content:
|
||||
if isinstance(content_part, OpenAIResponseInputMessageContentText):
|
||||
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
|
||||
|
|
@ -158,9 +160,11 @@ async def convert_response_input_to_chat_messages(
|
|||
),
|
||||
)
|
||||
messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
|
||||
# Output can be None, use empty string as fallback
|
||||
output_content = input_item.output if input_item.output is not None else ""
|
||||
messages.append(
|
||||
OpenAIToolMessageParam(
|
||||
content=input_item.output,
|
||||
content=output_content,
|
||||
tool_call_id=input_item.id,
|
||||
)
|
||||
)
|
||||
|
|
@ -172,7 +176,8 @@ async def convert_response_input_to_chat_messages(
|
|||
):
|
||||
# these are handled by the responses impl itself and not pass through to chat completions
|
||||
pass
|
||||
else:
|
||||
elif isinstance(input_item, OpenAIResponseMessage):
|
||||
# Narrow type to OpenAIResponseMessage which has content and role attributes
|
||||
content = await convert_response_content_to_chat_content(input_item.content)
|
||||
message_type = await get_message_type_by_role(input_item.role)
|
||||
if message_type is None:
|
||||
|
|
@ -191,7 +196,8 @@ async def convert_response_input_to_chat_messages(
|
|||
last_user_content = getattr(last_user_msg, "content", None)
|
||||
if last_user_content == content:
|
||||
continue # Skip duplicate user message
|
||||
messages.append(message_type(content=content))
|
||||
# Dynamic message type call - different message types have different content expectations
|
||||
messages.append(message_type(content=content)) # type: ignore[call-arg,arg-type]
|
||||
if len(tool_call_results):
|
||||
# Check if unpaired function_call_outputs reference function_calls from previous messages
|
||||
if previous_messages:
|
||||
|
|
@ -237,8 +243,11 @@ async def convert_response_text_to_chat_response_format(
|
|||
if text.format["type"] == "json_object":
|
||||
return OpenAIResponseFormatJSONObject()
|
||||
if text.format["type"] == "json_schema":
|
||||
# Assert name exists for json_schema format
|
||||
assert text.format.get("name"), "json_schema format requires a name"
|
||||
schema_name: str = text.format["name"] # type: ignore[assignment]
|
||||
return OpenAIResponseFormatJSONSchema(
|
||||
json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"])
|
||||
json_schema=OpenAIJSONSchema(name=schema_name, schema=text.format["schema"])
|
||||
)
|
||||
raise ValueError(f"Unsupported text format: {text.format}")
|
||||
|
||||
|
|
@ -251,7 +260,7 @@ async def get_message_type_by_role(role: str) -> type[OpenAIMessageParam] | None
|
|||
"assistant": OpenAIAssistantMessageParam,
|
||||
"developer": OpenAIDeveloperMessageParam,
|
||||
}
|
||||
return role_to_type.get(role)
|
||||
return role_to_type.get(role) # type: ignore[return-value] # Pydantic models use ModelMetaclass
|
||||
|
||||
|
||||
def _extract_citations_from_text(
|
||||
|
|
@ -320,7 +329,8 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
|
|||
|
||||
# Look up shields to get their provider_resource_id (actual model ID)
|
||||
model_ids = []
|
||||
shields_list = await safety_api.routing_table.list_shields()
|
||||
# TODO: list_shields not in Safety interface but available at runtime via API routing
|
||||
shields_list = await safety_api.routing_table.list_shields() # type: ignore[attr-defined]
|
||||
|
||||
for guardrail_id in guardrail_ids:
|
||||
matching_shields = [shield for shield in shields_list.data if shield.identifier == guardrail_id]
|
||||
|
|
@ -337,7 +347,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
|
|||
for result in response.results:
|
||||
if result.flagged:
|
||||
message = result.user_message or "Content blocked by safety guardrails"
|
||||
flagged_categories = [cat for cat, flagged in result.categories.items() if flagged]
|
||||
flagged_categories = (
|
||||
[cat for cat, flagged in result.categories.items() if flagged] if result.categories else []
|
||||
)
|
||||
violation_type = result.metadata.get("violation_type", []) if result.metadata else []
|
||||
|
||||
if flagged_categories:
|
||||
|
|
@ -347,6 +359,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
|
|||
|
||||
return message
|
||||
|
||||
# No violations found
|
||||
return None
|
||||
|
||||
|
||||
def extract_guardrail_ids(guardrails: list | None) -> list[str]:
|
||||
"""Extract guardrail IDs from guardrails parameter, handling both string IDs and ResponseGuardrailSpec objects."""
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
import asyncio
|
||||
|
||||
from llama_stack.apis.inference import Message
|
||||
from llama_stack.apis.inference import OpenAIMessageParam
|
||||
from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
|
||||
from llama_stack.core.telemetry import tracing
|
||||
from llama_stack.log import get_logger
|
||||
|
|
@ -31,7 +31,7 @@ class ShieldRunnerMixin:
|
|||
self.input_shields = input_shields
|
||||
self.output_shields = output_shields
|
||||
|
||||
async def run_multiple_shields(self, messages: list[Message], identifiers: list[str]) -> None:
|
||||
async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
|
||||
async def run_shield_with_span(identifier: str):
|
||||
async with tracing.span(f"run_shield_{identifier}"):
|
||||
return await self.safety_api.run_shield(
|
||||
|
|
|
|||
|
|
@ -33,4 +33,5 @@ class AnthropicInferenceAdapter(OpenAIMixin):
|
|||
return "https://api.anthropic.com/v1"
|
||||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
return [m.id async for m in AsyncAnthropic(api_key=self.get_api_key()).models.list()]
|
||||
api_key = self._get_api_key_from_config_or_provider_data()
|
||||
return [m.id async for m in AsyncAnthropic(api_key=api_key).models.list()]
|
||||
|
|
|
|||
|
|
@ -33,10 +33,11 @@ class DatabricksInferenceAdapter(OpenAIMixin):
|
|||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
# Filter out None values from endpoint names
|
||||
api_token = self._get_api_key_from_config_or_provider_data()
|
||||
return [
|
||||
endpoint.name # type: ignore[misc]
|
||||
for endpoint in WorkspaceClient(
|
||||
host=self.config.url, token=self.get_api_key()
|
||||
host=self.config.url, token=api_token
|
||||
).serving_endpoints.list() # TODO: this is not async
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -128,7 +128,9 @@ class LiteLLMOpenAIMixin(
|
|||
return schema
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||
input_dict = {}
|
||||
from typing import Any
|
||||
|
||||
input_dict: dict[str, Any] = {}
|
||||
|
||||
input_dict["messages"] = [
|
||||
await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages
|
||||
|
|
@ -139,30 +141,27 @@ class LiteLLMOpenAIMixin(
|
|||
f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported."
|
||||
)
|
||||
|
||||
fmt = fmt.json_schema
|
||||
name = fmt["title"]
|
||||
del fmt["title"]
|
||||
fmt["additionalProperties"] = False
|
||||
# Convert to dict for manipulation
|
||||
fmt_dict = dict(fmt.json_schema)
|
||||
name = fmt_dict["title"]
|
||||
del fmt_dict["title"]
|
||||
fmt_dict["additionalProperties"] = False
|
||||
|
||||
# Apply additionalProperties: False recursively to all objects
|
||||
fmt = self._add_additional_properties_recursive(fmt)
|
||||
fmt_dict = self._add_additional_properties_recursive(fmt_dict)
|
||||
|
||||
input_dict["response_format"] = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": name,
|
||||
"schema": fmt,
|
||||
"schema": fmt_dict,
|
||||
"strict": self.json_schema_strict,
|
||||
},
|
||||
}
|
||||
if request.tools:
|
||||
input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
|
||||
if request.tool_config.tool_choice:
|
||||
input_dict["tool_choice"] = (
|
||||
request.tool_config.tool_choice.value
|
||||
if isinstance(request.tool_config.tool_choice, ToolChoice)
|
||||
else request.tool_config.tool_choice
|
||||
)
|
||||
if request.tool_config and (tool_choice := request.tool_config.tool_choice):
|
||||
input_dict["tool_choice"] = tool_choice.value if isinstance(tool_choice, ToolChoice) else tool_choice
|
||||
|
||||
return {
|
||||
"model": request.model,
|
||||
|
|
@ -176,10 +175,10 @@ class LiteLLMOpenAIMixin(
|
|||
def get_api_key(self) -> str:
|
||||
provider_data = self.get_request_provider_data()
|
||||
key_field = self.provider_data_api_key_field
|
||||
if provider_data and getattr(provider_data, key_field, None):
|
||||
api_key = getattr(provider_data, key_field)
|
||||
else:
|
||||
api_key = self.api_key_from_config
|
||||
if provider_data and key_field and (api_key := getattr(provider_data, key_field, None)):
|
||||
return str(api_key) # type: ignore[no-any-return] # getattr returns Any, can't narrow without runtime type inspection
|
||||
|
||||
api_key = self.api_key_from_config
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"API key is not set. Please provide a valid API key in the "
|
||||
|
|
@ -192,7 +191,13 @@ class LiteLLMOpenAIMixin(
|
|||
self,
|
||||
params: OpenAIEmbeddingsRequestWithExtraBody,
|
||||
) -> OpenAIEmbeddingsResponse:
|
||||
if not self.model_store:
|
||||
raise ValueError("Model store is not initialized")
|
||||
|
||||
model_obj = await self.model_store.get_model(params.model)
|
||||
if model_obj.provider_resource_id is None:
|
||||
raise ValueError(f"Model {params.model} has no provider_resource_id")
|
||||
provider_resource_id = model_obj.provider_resource_id
|
||||
|
||||
# Convert input to list if it's a string
|
||||
input_list = [params.input] if isinstance(params.input, str) else params.input
|
||||
|
|
@ -200,7 +205,7 @@ class LiteLLMOpenAIMixin(
|
|||
# Call litellm embedding function
|
||||
# litellm.drop_params = True
|
||||
response = litellm.embedding(
|
||||
model=self.get_litellm_model_name(model_obj.provider_resource_id),
|
||||
model=self.get_litellm_model_name(provider_resource_id),
|
||||
input=input_list,
|
||||
api_key=self.get_api_key(),
|
||||
api_base=self.api_base,
|
||||
|
|
@ -217,7 +222,7 @@ class LiteLLMOpenAIMixin(
|
|||
|
||||
return OpenAIEmbeddingsResponse(
|
||||
data=data,
|
||||
model=model_obj.provider_resource_id,
|
||||
model=provider_resource_id,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
|
@ -225,10 +230,16 @@ class LiteLLMOpenAIMixin(
|
|||
self,
|
||||
params: OpenAICompletionRequestWithExtraBody,
|
||||
) -> OpenAICompletion:
|
||||
if not self.model_store:
|
||||
raise ValueError("Model store is not initialized")
|
||||
|
||||
model_obj = await self.model_store.get_model(params.model)
|
||||
if model_obj.provider_resource_id is None:
|
||||
raise ValueError(f"Model {params.model} has no provider_resource_id")
|
||||
provider_resource_id = model_obj.provider_resource_id
|
||||
|
||||
request_params = await prepare_openai_completion_params(
|
||||
model=self.get_litellm_model_name(model_obj.provider_resource_id),
|
||||
model=self.get_litellm_model_name(provider_resource_id),
|
||||
prompt=params.prompt,
|
||||
best_of=params.best_of,
|
||||
echo=params.echo,
|
||||
|
|
@ -249,7 +260,8 @@ class LiteLLMOpenAIMixin(
|
|||
api_key=self.get_api_key(),
|
||||
api_base=self.api_base,
|
||||
)
|
||||
return await litellm.atext_completion(**request_params)
|
||||
# LiteLLM returns compatible type but mypy can't verify external library
|
||||
return await litellm.atext_completion(**request_params) # type: ignore[no-any-return] # external lib lacks type stubs
|
||||
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
|
|
@ -265,10 +277,16 @@ class LiteLLMOpenAIMixin(
|
|||
elif "include_usage" not in stream_options:
|
||||
stream_options = {**stream_options, "include_usage": True}
|
||||
|
||||
if not self.model_store:
|
||||
raise ValueError("Model store is not initialized")
|
||||
|
||||
model_obj = await self.model_store.get_model(params.model)
|
||||
if model_obj.provider_resource_id is None:
|
||||
raise ValueError(f"Model {params.model} has no provider_resource_id")
|
||||
provider_resource_id = model_obj.provider_resource_id
|
||||
|
||||
request_params = await prepare_openai_completion_params(
|
||||
model=self.get_litellm_model_name(model_obj.provider_resource_id),
|
||||
model=self.get_litellm_model_name(provider_resource_id),
|
||||
messages=params.messages,
|
||||
frequency_penalty=params.frequency_penalty,
|
||||
function_call=params.function_call,
|
||||
|
|
@ -294,7 +312,8 @@ class LiteLLMOpenAIMixin(
|
|||
api_key=self.get_api_key(),
|
||||
api_base=self.api_base,
|
||||
)
|
||||
return await litellm.acompletion(**request_params)
|
||||
# LiteLLM returns compatible type but mypy can't verify external library
|
||||
return await litellm.acompletion(**request_params) # type: ignore[no-any-return] # external lib lacks type stubs
|
||||
|
||||
async def check_model_availability(self, model: str) -> bool:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -161,8 +161,10 @@ def get_sampling_strategy_options(params: SamplingParams) -> dict:
|
|||
if isinstance(params.strategy, GreedySamplingStrategy):
|
||||
options["temperature"] = 0.0
|
||||
elif isinstance(params.strategy, TopPSamplingStrategy):
|
||||
options["temperature"] = params.strategy.temperature
|
||||
options["top_p"] = params.strategy.top_p
|
||||
if params.strategy.temperature is not None:
|
||||
options["temperature"] = params.strategy.temperature
|
||||
if params.strategy.top_p is not None:
|
||||
options["top_p"] = params.strategy.top_p
|
||||
elif isinstance(params.strategy, TopKSamplingStrategy):
|
||||
options["top_k"] = params.strategy.top_k
|
||||
else:
|
||||
|
|
@ -192,12 +194,12 @@ def get_sampling_options(params: SamplingParams | None) -> dict:
|
|||
|
||||
def text_from_choice(choice) -> str:
|
||||
if hasattr(choice, "delta") and choice.delta:
|
||||
return choice.delta.content
|
||||
return choice.delta.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations
|
||||
|
||||
if hasattr(choice, "message"):
|
||||
return choice.message.content
|
||||
return choice.message.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations
|
||||
|
||||
return choice.text
|
||||
return choice.text # type: ignore[no-any-return] # external OpenAI types lack precise annotations
|
||||
|
||||
|
||||
def get_stop_reason(finish_reason: str) -> StopReason:
|
||||
|
|
@ -216,7 +218,7 @@ def convert_openai_completion_logprobs(
|
|||
) -> list[TokenLogProbs] | None:
|
||||
if not logprobs:
|
||||
return None
|
||||
if hasattr(logprobs, "top_logprobs"):
|
||||
if hasattr(logprobs, "top_logprobs") and logprobs.top_logprobs:
|
||||
return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
|
||||
|
||||
# Together supports logprobs with top_k=1 only. This means for each token position,
|
||||
|
|
@ -236,7 +238,7 @@ def convert_openai_completion_logprobs_stream(text: str, logprobs: float | OpenA
|
|||
if isinstance(logprobs, float):
|
||||
# Adapt response from Together CompletionChoicesChunk
|
||||
return [TokenLogProbs(logprobs_by_token={text: logprobs})]
|
||||
if hasattr(logprobs, "top_logprobs"):
|
||||
if hasattr(logprobs, "top_logprobs") and logprobs.top_logprobs:
|
||||
return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
|
||||
return None
|
||||
|
||||
|
|
@ -245,23 +247,24 @@ def process_completion_response(
|
|||
response: OpenAICompatCompletionResponse,
|
||||
) -> CompletionResponse:
|
||||
choice = response.choices[0]
|
||||
text = choice.text or ""
|
||||
# drop suffix <eot_id> if present and return stop reason as end of turn
|
||||
if choice.text.endswith("<|eot_id|>"):
|
||||
if text.endswith("<|eot_id|>"):
|
||||
return CompletionResponse(
|
||||
stop_reason=StopReason.end_of_turn,
|
||||
content=choice.text[: -len("<|eot_id|>")],
|
||||
content=text[: -len("<|eot_id|>")],
|
||||
logprobs=convert_openai_completion_logprobs(choice.logprobs),
|
||||
)
|
||||
# drop suffix <eom_id> if present and return stop reason as end of message
|
||||
if choice.text.endswith("<|eom_id|>"):
|
||||
if text.endswith("<|eom_id|>"):
|
||||
return CompletionResponse(
|
||||
stop_reason=StopReason.end_of_message,
|
||||
content=choice.text[: -len("<|eom_id|>")],
|
||||
content=text[: -len("<|eom_id|>")],
|
||||
logprobs=convert_openai_completion_logprobs(choice.logprobs),
|
||||
)
|
||||
return CompletionResponse(
|
||||
stop_reason=get_stop_reason(choice.finish_reason),
|
||||
content=choice.text,
|
||||
stop_reason=get_stop_reason(choice.finish_reason or "stop"),
|
||||
content=text,
|
||||
logprobs=convert_openai_completion_logprobs(choice.logprobs),
|
||||
)
|
||||
|
||||
|
|
@ -272,10 +275,10 @@ def process_chat_completion_response(
|
|||
) -> ChatCompletionResponse:
|
||||
choice = response.choices[0]
|
||||
if choice.finish_reason == "tool_calls":
|
||||
if not choice.message or not choice.message.tool_calls:
|
||||
if not hasattr(choice, "message") or not choice.message or not choice.message.tool_calls: # type: ignore[attr-defined] # OpenAICompatCompletionChoice is runtime duck-typed
|
||||
raise ValueError("Tool calls are not present in the response")
|
||||
|
||||
tool_calls = [convert_tool_call(tool_call) for tool_call in choice.message.tool_calls]
|
||||
tool_calls = [convert_tool_call(tool_call) for tool_call in choice.message.tool_calls] # type: ignore[attr-defined] # OpenAICompatCompletionChoice is runtime duck-typed
|
||||
if any(isinstance(tool_call, UnparseableToolCall) for tool_call in tool_calls):
|
||||
# If we couldn't parse a tool call, jsonify the tool calls and return them
|
||||
return ChatCompletionResponse(
|
||||
|
|
@ -287,9 +290,11 @@ def process_chat_completion_response(
|
|||
)
|
||||
else:
|
||||
# Otherwise, return tool calls as normal
|
||||
# Filter to only valid ToolCall objects
|
||||
valid_tool_calls = [tc for tc in tool_calls if isinstance(tc, ToolCall)]
|
||||
return ChatCompletionResponse(
|
||||
completion_message=CompletionMessage(
|
||||
tool_calls=tool_calls,
|
||||
tool_calls=valid_tool_calls,
|
||||
stop_reason=StopReason.end_of_turn,
|
||||
# Content is not optional
|
||||
content="",
|
||||
|
|
@ -299,7 +304,7 @@ def process_chat_completion_response(
|
|||
|
||||
# TODO: This does not work well with tool calls for vLLM remote provider
|
||||
# Ref: https://github.com/meta-llama/llama-stack/issues/1058
|
||||
raw_message = decode_assistant_message(text_from_choice(choice), get_stop_reason(choice.finish_reason))
|
||||
raw_message = decode_assistant_message(text_from_choice(choice), get_stop_reason(choice.finish_reason or "stop"))
|
||||
|
||||
# NOTE: If we do not set tools in chat-completion request, we should not
|
||||
# expect the ToolCall in the response. Instead, we should return the raw
|
||||
|
|
@ -324,8 +329,8 @@ def process_chat_completion_response(
|
|||
|
||||
return ChatCompletionResponse(
|
||||
completion_message=CompletionMessage(
|
||||
content=raw_message.content,
|
||||
stop_reason=raw_message.stop_reason,
|
||||
content=raw_message.content, # type: ignore[arg-type] # decode_assistant_message returns Union[str, InterleavedContent]
|
||||
stop_reason=raw_message.stop_reason or StopReason.end_of_turn,
|
||||
tool_calls=raw_message.tool_calls,
|
||||
),
|
||||
logprobs=None,
|
||||
|
|
@ -448,7 +453,7 @@ async def process_chat_completion_stream_response(
|
|||
)
|
||||
|
||||
# parse tool calls and report errors
|
||||
message = decode_assistant_message(buffer, stop_reason)
|
||||
message = decode_assistant_message(buffer, stop_reason or StopReason.end_of_turn)
|
||||
|
||||
parsed_tool_calls = len(message.tool_calls) > 0
|
||||
if ipython and not parsed_tool_calls:
|
||||
|
|
@ -463,7 +468,7 @@ async def process_chat_completion_stream_response(
|
|||
)
|
||||
)
|
||||
|
||||
request_tools = {t.tool_name: t for t in request.tools}
|
||||
request_tools = {t.tool_name: t for t in (request.tools or [])}
|
||||
for tool_call in message.tool_calls:
|
||||
if tool_call.tool_name in request_tools:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
|
|
@ -525,7 +530,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
|
|||
}
|
||||
|
||||
if hasattr(message, "tool_calls") and message.tool_calls:
|
||||
result["tool_calls"] = []
|
||||
tool_calls_list = []
|
||||
for tc in message.tool_calls:
|
||||
# The tool.tool_name can be a str or a BuiltinTool enum. If
|
||||
# it's the latter, convert to a string.
|
||||
|
|
@ -533,7 +538,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
|
|||
if isinstance(tool_name, BuiltinTool):
|
||||
tool_name = tool_name.value
|
||||
|
||||
result["tool_calls"].append(
|
||||
tool_calls_list.append(
|
||||
{
|
||||
"id": tc.call_id,
|
||||
"type": "function",
|
||||
|
|
@ -543,6 +548,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
|
|||
},
|
||||
}
|
||||
)
|
||||
result["tool_calls"] = tool_calls_list # type: ignore[assignment] # dict allows Any value, stricter type expected
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -608,7 +614,7 @@ async def convert_message_to_openai_dict_new(
|
|||
),
|
||||
)
|
||||
elif isinstance(content_, list):
|
||||
return [await impl(item) for item in content_]
|
||||
return [await impl(item) for item in content_] # type: ignore[misc] # recursive list comprehension confuses mypy's type narrowing
|
||||
else:
|
||||
raise ValueError(f"Unsupported content type: {type(content_)}")
|
||||
|
||||
|
|
@ -620,7 +626,7 @@ async def convert_message_to_openai_dict_new(
|
|||
else:
|
||||
return [ret]
|
||||
|
||||
out: OpenAIChatCompletionMessage = None
|
||||
out: OpenAIChatCompletionMessage
|
||||
if isinstance(message, UserMessage):
|
||||
out = OpenAIChatCompletionUserMessage(
|
||||
role="user",
|
||||
|
|
@ -636,7 +642,7 @@ async def convert_message_to_openai_dict_new(
|
|||
),
|
||||
type="function",
|
||||
)
|
||||
for tool in message.tool_calls
|
||||
for tool in (message.tool_calls or [])
|
||||
]
|
||||
params = {}
|
||||
if tool_calls:
|
||||
|
|
@ -644,18 +650,18 @@ async def convert_message_to_openai_dict_new(
|
|||
out = OpenAIChatCompletionAssistantMessage(
|
||||
role="assistant",
|
||||
content=await _convert_message_content(message.content),
|
||||
**params,
|
||||
**params, # type: ignore[typeddict-item] # tool_calls dict expansion conflicts with TypedDict optional field
|
||||
)
|
||||
elif isinstance(message, ToolResponseMessage):
|
||||
out = OpenAIChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=message.call_id,
|
||||
content=await _convert_message_content(message.content),
|
||||
content=await _convert_message_content(message.content), # type: ignore[typeddict-item] # content union type incompatible with TypedDict str requirement
|
||||
)
|
||||
elif isinstance(message, SystemMessage):
|
||||
out = OpenAIChatCompletionSystemMessage(
|
||||
role="system",
|
||||
content=await _convert_message_content(message.content),
|
||||
content=await _convert_message_content(message.content), # type: ignore[typeddict-item] # content union type incompatible with TypedDict str requirement
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported message type: {type(message)}")
|
||||
|
|
@ -758,16 +764,16 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
|
|||
function = out["function"]
|
||||
|
||||
if isinstance(tool.tool_name, BuiltinTool):
|
||||
function["name"] = tool.tool_name.value
|
||||
function["name"] = tool.tool_name.value # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
|
||||
else:
|
||||
function["name"] = tool.tool_name
|
||||
function["name"] = tool.tool_name # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
|
||||
|
||||
if tool.description:
|
||||
function["description"] = tool.description
|
||||
function["description"] = tool.description # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
|
||||
|
||||
if tool.input_schema:
|
||||
# Pass through the entire JSON Schema as-is
|
||||
function["parameters"] = tool.input_schema
|
||||
function["parameters"] = tool.input_schema # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str]
|
||||
|
||||
# NOTE: OpenAI does not support output_schema, so we drop it here
|
||||
# It's stored in LlamaStack for validation and other provider usage
|
||||
|
|
@ -815,15 +821,15 @@ def _convert_openai_request_tool_config(tool_choice: str | dict[str, Any] | None
|
|||
tool_config = ToolConfig()
|
||||
if tool_choice:
|
||||
try:
|
||||
tool_choice = ToolChoice(tool_choice)
|
||||
tool_choice = ToolChoice(tool_choice) # type: ignore[assignment] # reassigning to enum narrows union but mypy can't track after exception
|
||||
except ValueError:
|
||||
pass
|
||||
tool_config.tool_choice = tool_choice
|
||||
tool_config.tool_choice = tool_choice # type: ignore[assignment] # ToolConfig.tool_choice accepts Union[ToolChoice, dict] but mypy tracks narrower type
|
||||
return tool_config
|
||||
|
||||
|
||||
def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]:
|
||||
lls_tools = []
|
||||
lls_tools: list[ToolDefinition] = []
|
||||
if not tools:
|
||||
return lls_tools
|
||||
|
||||
|
|
@ -843,16 +849,16 @@ def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) ->
|
|||
|
||||
|
||||
def _convert_openai_request_response_format(
|
||||
response_format: OpenAIResponseFormatParam = None,
|
||||
response_format: OpenAIResponseFormatParam | None = None,
|
||||
):
|
||||
if not response_format:
|
||||
return None
|
||||
# response_format can be a dict or a pydantic model
|
||||
response_format = dict(response_format)
|
||||
if response_format.get("type", "") == "json_schema":
|
||||
response_format_dict = dict(response_format) # type: ignore[arg-type] # OpenAIResponseFormatParam union needs dict conversion
|
||||
if response_format_dict.get("type", "") == "json_schema":
|
||||
return JsonSchemaResponseFormat(
|
||||
type="json_schema",
|
||||
json_schema=response_format.get("json_schema", {}).get("schema", ""),
|
||||
type="json_schema", # type: ignore[arg-type] # Literal["json_schema"] incompatible with expected type
|
||||
json_schema=response_format_dict.get("json_schema", {}).get("schema", ""),
|
||||
)
|
||||
return None
|
||||
|
||||
|
|
@ -938,16 +944,15 @@ def _convert_openai_sampling_params(
|
|||
|
||||
# Map an explicit temperature of 0 to greedy sampling
|
||||
if temperature == 0:
|
||||
strategy = GreedySamplingStrategy()
|
||||
sampling_params.strategy = GreedySamplingStrategy()
|
||||
else:
|
||||
# OpenAI defaults to 1.0 for temperature and top_p if unset
|
||||
if temperature is None:
|
||||
temperature = 1.0
|
||||
if top_p is None:
|
||||
top_p = 1.0
|
||||
strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p)
|
||||
sampling_params.strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p) # type: ignore[assignment] # SamplingParams.strategy union accepts this type
|
||||
|
||||
sampling_params.strategy = strategy
|
||||
return sampling_params
|
||||
|
||||
|
||||
|
|
@ -957,23 +962,24 @@ def openai_messages_to_messages(
|
|||
"""
|
||||
Convert a list of OpenAIChatCompletionMessage into a list of Message.
|
||||
"""
|
||||
converted_messages = []
|
||||
converted_messages: list[Message] = []
|
||||
for message in messages:
|
||||
converted_message: Message
|
||||
if message.role == "system":
|
||||
converted_message = SystemMessage(content=openai_content_to_content(message.content))
|
||||
converted_message = SystemMessage(content=openai_content_to_content(message.content)) # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
|
||||
elif message.role == "user":
|
||||
converted_message = UserMessage(content=openai_content_to_content(message.content))
|
||||
converted_message = UserMessage(content=openai_content_to_content(message.content)) # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
|
||||
elif message.role == "assistant":
|
||||
converted_message = CompletionMessage(
|
||||
content=openai_content_to_content(message.content),
|
||||
tool_calls=_convert_openai_tool_calls(message.tool_calls),
|
||||
content=openai_content_to_content(message.content), # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
|
||||
tool_calls=_convert_openai_tool_calls(message.tool_calls) if message.tool_calls else [], # type: ignore[arg-type] # OpenAI tool_calls type incompatible with conversion function
|
||||
stop_reason=StopReason.end_of_turn,
|
||||
)
|
||||
elif message.role == "tool":
|
||||
converted_message = ToolResponseMessage(
|
||||
role="tool",
|
||||
call_id=message.tool_call_id,
|
||||
content=openai_content_to_content(message.content),
|
||||
content=openai_content_to_content(message.content), # type: ignore[arg-type] # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown role {message.role}")
|
||||
|
|
@ -990,9 +996,9 @@ def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionConten
|
|||
return [openai_content_to_content(c) for c in content]
|
||||
elif hasattr(content, "type"):
|
||||
if content.type == "text":
|
||||
return TextContentItem(type="text", text=content.text)
|
||||
return TextContentItem(type="text", text=content.text) # type: ignore[attr-defined] # Iterable narrowed by hasattr check but mypy doesn't track
|
||||
elif content.type == "image_url":
|
||||
return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url)))
|
||||
return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url))) # type: ignore[attr-defined] # Iterable narrowed by hasattr check but mypy doesn't track
|
||||
else:
|
||||
raise ValueError(f"Unknown content type: {content.type}")
|
||||
else:
|
||||
|
|
@ -1041,9 +1047,9 @@ def convert_openai_chat_completion_choice(
|
|||
completion_message=CompletionMessage(
|
||||
content=choice.message.content or "", # CompletionMessage content is not optional
|
||||
stop_reason=_convert_openai_finish_reason(choice.finish_reason),
|
||||
tool_calls=_convert_openai_tool_calls(choice.message.tool_calls),
|
||||
tool_calls=_convert_openai_tool_calls(choice.message.tool_calls) if choice.message.tool_calls else [], # type: ignore[arg-type] # OpenAI tool_calls Optional type broadens union
|
||||
),
|
||||
logprobs=_convert_openai_logprobs(getattr(choice, "logprobs", None)),
|
||||
logprobs=_convert_openai_logprobs(getattr(choice, "logprobs", None)), # type: ignore[arg-type] # getattr returns Any, can't narrow without inspection
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1070,7 +1076,7 @@ async def convert_openai_chat_completion_stream(
|
|||
choice = chunk.choices[0] # assuming only one choice per chunk
|
||||
|
||||
# we assume there's only one finish_reason in the stream
|
||||
stop_reason = _convert_openai_finish_reason(choice.finish_reason) or stop_reason
|
||||
stop_reason = _convert_openai_finish_reason(choice.finish_reason) if choice.finish_reason else stop_reason
|
||||
logprobs = getattr(choice, "logprobs", None)
|
||||
|
||||
# if there's a tool call, emit an event for each tool in the list
|
||||
|
|
@ -1083,7 +1089,7 @@ async def convert_openai_chat_completion_stream(
|
|||
event=ChatCompletionResponseEvent(
|
||||
event_type=event_type,
|
||||
delta=TextDelta(text=choice.delta.content),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -1101,10 +1107,10 @@ async def convert_openai_chat_completion_stream(
|
|||
event=ChatCompletionResponseEvent(
|
||||
event_type=event_type,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=_convert_openai_tool_calls([tool_call])[0],
|
||||
tool_call=_convert_openai_tool_calls([tool_call])[0], # type: ignore[arg-type, list-item] # delta tool_call type differs from complete tool_call
|
||||
parse_status=ToolCallParseStatus.succeeded,
|
||||
),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
|
||||
)
|
||||
)
|
||||
else:
|
||||
|
|
@ -1125,12 +1131,15 @@ async def convert_openai_chat_completion_stream(
|
|||
if tool_call.function.name:
|
||||
buffer["name"] = tool_call.function.name
|
||||
delta = f"{buffer['name']}("
|
||||
buffer["content"] += delta
|
||||
if buffer["content"] is not None:
|
||||
buffer["content"] += delta
|
||||
|
||||
if tool_call.function.arguments:
|
||||
delta = tool_call.function.arguments
|
||||
buffer["arguments"] += delta
|
||||
buffer["content"] += delta
|
||||
if buffer["arguments"] is not None and delta:
|
||||
buffer["arguments"] += delta
|
||||
if buffer["content"] is not None and delta:
|
||||
buffer["content"] += delta
|
||||
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
|
|
@ -1139,7 +1148,7 @@ async def convert_openai_chat_completion_stream(
|
|||
tool_call=delta,
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
|
||||
)
|
||||
)
|
||||
elif choice.delta.content:
|
||||
|
|
@ -1147,7 +1156,7 @@ async def convert_openai_chat_completion_stream(
|
|||
event=ChatCompletionResponseEvent(
|
||||
event_type=event_type,
|
||||
delta=TextDelta(text=choice.delta.content or ""),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
logprobs=_convert_openai_logprobs(logprobs), # type: ignore[arg-type] # logprobs type broadened from getattr result
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -1155,7 +1164,8 @@ async def convert_openai_chat_completion_stream(
|
|||
logger.debug(f"toolcall_buffer[{idx}]: {buffer}")
|
||||
if buffer["name"]:
|
||||
delta = ")"
|
||||
buffer["content"] += delta
|
||||
if buffer["content"] is not None:
|
||||
buffer["content"] += delta
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=event_type,
|
||||
|
|
@ -1168,16 +1178,16 @@ async def convert_openai_chat_completion_stream(
|
|||
)
|
||||
|
||||
try:
|
||||
tool_call = ToolCall(
|
||||
call_id=buffer["call_id"],
|
||||
tool_name=buffer["name"],
|
||||
arguments=buffer["arguments"],
|
||||
parsed_tool_call = ToolCall(
|
||||
call_id=buffer["call_id"] or "",
|
||||
tool_name=buffer["name"] or "",
|
||||
arguments=buffer["arguments"] or "",
|
||||
)
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=tool_call,
|
||||
tool_call=parsed_tool_call, # type: ignore[arg-type] # ToolCallDelta.tool_call accepts Union[str, ToolCall]
|
||||
parse_status=ToolCallParseStatus.succeeded,
|
||||
),
|
||||
stop_reason=stop_reason,
|
||||
|
|
@ -1189,7 +1199,7 @@ async def convert_openai_chat_completion_stream(
|
|||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=buffer["content"],
|
||||
tool_call=buffer["content"], # type: ignore[arg-type] # ToolCallDelta.tool_call accepts Union[str, ToolCall]
|
||||
parse_status=ToolCallParseStatus.failed,
|
||||
),
|
||||
stop_reason=stop_reason,
|
||||
|
|
@ -1250,7 +1260,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
|
|||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
messages = openai_messages_to_messages(messages)
|
||||
messages = openai_messages_to_messages(messages) # type: ignore[assignment] # converted from OpenAI to LlamaStack message format
|
||||
response_format = _convert_openai_request_response_format(response_format)
|
||||
sampling_params = _convert_openai_sampling_params(
|
||||
max_tokens=max_tokens,
|
||||
|
|
@ -1259,15 +1269,15 @@ class OpenAIChatCompletionToLlamaStackMixin:
|
|||
)
|
||||
tool_config = _convert_openai_request_tool_config(tool_choice)
|
||||
|
||||
tools = _convert_openai_request_tools(tools)
|
||||
tools = _convert_openai_request_tools(tools) # type: ignore[assignment] # converted from OpenAI to LlamaStack tool format
|
||||
if tool_config.tool_choice == ToolChoice.none:
|
||||
tools = []
|
||||
tools = [] # type: ignore[assignment] # empty list narrows return type but mypy tracks broader type
|
||||
|
||||
outstanding_responses = []
|
||||
# "n" is the number of completions to generate per prompt
|
||||
n = n or 1
|
||||
for _i in range(0, n):
|
||||
response = self.chat_completion(
|
||||
response = self.chat_completion( # type: ignore[attr-defined] # mixin expects class to implement chat_completion
|
||||
model_id=model,
|
||||
messages=messages,
|
||||
sampling_params=sampling_params,
|
||||
|
|
@ -1279,7 +1289,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
|
|||
outstanding_responses.append(response)
|
||||
|
||||
if stream:
|
||||
return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses)
|
||||
return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses) # type: ignore[no-any-return] # mixin async generator return type too complex for mypy
|
||||
|
||||
return await OpenAIChatCompletionToLlamaStackMixin._process_non_stream_response(
|
||||
self, model, outstanding_responses
|
||||
|
|
@ -1295,14 +1305,16 @@ class OpenAIChatCompletionToLlamaStackMixin:
|
|||
response = await outstanding_response
|
||||
async for chunk in response:
|
||||
event = chunk.event
|
||||
finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason)
|
||||
finish_reason = (
|
||||
_convert_stop_reason_to_openai_finish_reason(event.stop_reason) if event.stop_reason else None
|
||||
)
|
||||
|
||||
if isinstance(event.delta, TextDelta):
|
||||
text_delta = event.delta.text
|
||||
delta = OpenAIChoiceDelta(content=text_delta)
|
||||
yield OpenAIChatCompletionChunk(
|
||||
id=id,
|
||||
choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)],
|
||||
choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)], # type: ignore[arg-type] # finish_reason Optional[str] incompatible with Literal union
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
object="chat.completion.chunk",
|
||||
|
|
@ -1310,13 +1322,17 @@ class OpenAIChatCompletionToLlamaStackMixin:
|
|||
elif isinstance(event.delta, ToolCallDelta):
|
||||
if event.delta.parse_status == ToolCallParseStatus.succeeded:
|
||||
tool_call = event.delta.tool_call
|
||||
if isinstance(tool_call, str):
|
||||
continue
|
||||
|
||||
# First chunk includes full structure
|
||||
openai_tool_call = OpenAIChoiceDeltaToolCall(
|
||||
index=0,
|
||||
id=tool_call.call_id,
|
||||
function=OpenAIChoiceDeltaToolCallFunction(
|
||||
name=tool_call.tool_name,
|
||||
name=tool_call.tool_name
|
||||
if isinstance(tool_call.tool_name, str)
|
||||
else tool_call.tool_name.value, # type: ignore[arg-type] # enum .value extraction on Union confuses mypy
|
||||
arguments="",
|
||||
),
|
||||
)
|
||||
|
|
@ -1324,7 +1340,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
|
|||
yield OpenAIChatCompletionChunk(
|
||||
id=id,
|
||||
choices=[
|
||||
OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)
|
||||
OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta) # type: ignore[arg-type] # finish_reason Optional[str] incompatible with Literal union
|
||||
],
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
|
|
@ -1341,7 +1357,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
|
|||
yield OpenAIChatCompletionChunk(
|
||||
id=id,
|
||||
choices=[
|
||||
OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)
|
||||
OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta) # type: ignore[arg-type] # finish_reason Optional[str] incompatible with Literal union
|
||||
],
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
|
|
@ -1351,7 +1367,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
|
|||
async def _process_non_stream_response(
|
||||
self, model: str, outstanding_responses: list[Awaitable[ChatCompletionResponse]]
|
||||
) -> OpenAIChatCompletion:
|
||||
choices = []
|
||||
choices: list[OpenAIChatCompletionChoice] = []
|
||||
for outstanding_response in outstanding_responses:
|
||||
response = await outstanding_response
|
||||
completion_message = response.completion_message
|
||||
|
|
@ -1360,14 +1376,14 @@ class OpenAIChatCompletionToLlamaStackMixin:
|
|||
|
||||
choice = OpenAIChatCompletionChoice(
|
||||
index=len(choices),
|
||||
message=message,
|
||||
message=message, # type: ignore[arg-type] # OpenAIChatCompletionMessage union incompatible with narrower Message type
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
choices.append(choice)
|
||||
choices.append(choice) # type: ignore[arg-type] # OpenAIChatCompletionChoice type annotation mismatch
|
||||
|
||||
return OpenAIChatCompletion(
|
||||
id=f"chatcmpl-{uuid.uuid4()}",
|
||||
choices=choices,
|
||||
choices=choices, # type: ignore[arg-type] # list[OpenAIChatCompletionChoice] union incompatible
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
object="chat.completion",
|
||||
|
|
|
|||
|
|
@ -196,6 +196,7 @@ def make_overlapped_chunks(
|
|||
chunks.append(
|
||||
Chunk(
|
||||
content=chunk,
|
||||
chunk_id=chunk_id,
|
||||
metadata=chunk_metadata,
|
||||
chunk_metadata=backend_chunk_metadata,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -430,6 +430,32 @@ def _unwrap_generic_list(typ: type[list[T]]) -> type[T]:
|
|||
return list_type # type: ignore[no-any-return]
|
||||
|
||||
|
||||
def is_generic_sequence(typ: object) -> bool:
|
||||
"True if the specified type is a generic Sequence, i.e. `Sequence[T]`."
|
||||
import collections.abc
|
||||
|
||||
typ = unwrap_annotated_type(typ)
|
||||
return typing.get_origin(typ) is collections.abc.Sequence
|
||||
|
||||
|
||||
def unwrap_generic_sequence(typ: object) -> type:
|
||||
"""
|
||||
Extracts the item type of a Sequence type.
|
||||
|
||||
:param typ: The Sequence type `Sequence[T]`.
|
||||
:returns: The item type `T`.
|
||||
"""
|
||||
|
||||
return rewrap_annotated_type(_unwrap_generic_sequence, typ) # type: ignore[arg-type]
|
||||
|
||||
|
||||
def _unwrap_generic_sequence(typ: object) -> type:
|
||||
"Extracts the item type of a Sequence type (e.g. returns `T` for `Sequence[T]`)."
|
||||
|
||||
(sequence_type,) = typing.get_args(typ) # unpack single tuple element
|
||||
return sequence_type # type: ignore[no-any-return]
|
||||
|
||||
|
||||
def is_generic_set(typ: object) -> TypeGuard[type[set]]:
|
||||
"True if the specified type is a generic set, i.e. `Set[T]`."
|
||||
|
||||
|
|
|
|||
|
|
@ -18,10 +18,12 @@ from .inspection import (
|
|||
TypeLike,
|
||||
is_generic_dict,
|
||||
is_generic_list,
|
||||
is_generic_sequence,
|
||||
is_type_optional,
|
||||
is_type_union,
|
||||
unwrap_generic_dict,
|
||||
unwrap_generic_list,
|
||||
unwrap_generic_sequence,
|
||||
unwrap_optional_type,
|
||||
unwrap_union_types,
|
||||
)
|
||||
|
|
@ -155,24 +157,28 @@ def python_type_to_name(data_type: TypeLike, force: bool = False) -> str:
|
|||
if metadata is not None:
|
||||
# type is Annotated[T, ...]
|
||||
arg = typing.get_args(data_type)[0]
|
||||
return python_type_to_name(arg)
|
||||
return python_type_to_name(arg, force=force)
|
||||
|
||||
if force:
|
||||
# generic types
|
||||
if is_type_optional(data_type, strict=True):
|
||||
inner_name = python_type_to_name(unwrap_optional_type(data_type))
|
||||
inner_name = python_type_to_name(unwrap_optional_type(data_type), force=True)
|
||||
return f"Optional__{inner_name}"
|
||||
elif is_generic_list(data_type):
|
||||
item_name = python_type_to_name(unwrap_generic_list(data_type))
|
||||
item_name = python_type_to_name(unwrap_generic_list(data_type), force=True)
|
||||
return f"List__{item_name}"
|
||||
elif is_generic_sequence(data_type):
|
||||
# Treat Sequence the same as List for schema generation purposes
|
||||
item_name = python_type_to_name(unwrap_generic_sequence(data_type), force=True)
|
||||
return f"List__{item_name}"
|
||||
elif is_generic_dict(data_type):
|
||||
key_type, value_type = unwrap_generic_dict(data_type)
|
||||
key_name = python_type_to_name(key_type)
|
||||
value_name = python_type_to_name(value_type)
|
||||
key_name = python_type_to_name(key_type, force=True)
|
||||
value_name = python_type_to_name(value_type, force=True)
|
||||
return f"Dict__{key_name}__{value_name}"
|
||||
elif is_type_union(data_type):
|
||||
member_types = unwrap_union_types(data_type)
|
||||
member_names = "__".join(python_type_to_name(member_type) for member_type in member_types)
|
||||
member_names = "__".join(python_type_to_name(member_type, force=True) for member_type in member_types)
|
||||
return f"Union__{member_names}"
|
||||
|
||||
# named system or user-defined type
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ def get_class_property_docstrings(
|
|||
def docstring_to_schema(data_type: type) -> Schema:
|
||||
short_description, long_description = get_class_docstrings(data_type)
|
||||
schema: Schema = {
|
||||
"title": python_type_to_name(data_type),
|
||||
"title": python_type_to_name(data_type, force=True),
|
||||
}
|
||||
|
||||
description = "\n".join(filter(None, [short_description, long_description]))
|
||||
|
|
@ -417,6 +417,10 @@ class JsonSchemaGenerator:
|
|||
if origin_type is list:
|
||||
(list_type,) = typing.get_args(typ) # unpack single tuple element
|
||||
return {"type": "array", "items": self.type_to_schema(list_type)}
|
||||
elif origin_type is collections.abc.Sequence:
|
||||
# Treat Sequence the same as list for JSON schema (both are arrays)
|
||||
(sequence_type,) = typing.get_args(typ) # unpack single tuple element
|
||||
return {"type": "array", "items": self.type_to_schema(sequence_type)}
|
||||
elif origin_type is dict:
|
||||
key_type, value_type = typing.get_args(typ)
|
||||
if not (key_type is str or key_type is int or is_type_enum(key_type)):
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
|||
if provider.provider_type in (
|
||||
"inline::meta-reference",
|
||||
"inline::sentence-transformers",
|
||||
"inline::vllm",
|
||||
"remote::vllm",
|
||||
"remote::bedrock",
|
||||
"remote::databricks",
|
||||
# Technically Nvidia does support OpenAI completions, but none of their hosted models
|
||||
|
|
@ -120,7 +120,7 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
|
|||
if provider.provider_type in (
|
||||
"inline::meta-reference",
|
||||
"inline::sentence-transformers",
|
||||
"inline::vllm",
|
||||
"remote::vllm",
|
||||
"remote::bedrock",
|
||||
"remote::databricks",
|
||||
"remote::cerebras",
|
||||
|
|
|
|||
1766
tests/integration/responses/recordings/00cc2202e2906845aec8fe97f0e31e55abd32a289a516722ccab502c4e312c2c.json
generated
Normal file
1766
tests/integration/responses/recordings/00cc2202e2906845aec8fe97f0e31e55abd32a289a516722ccab502c4e312c2c.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,763 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How many experts does the Llama 4 Maverick model have?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "[1] document_id: file-861837565219, score: 0.015252742239920682, attributes: {'filename': 'test_response_non_streaming_file_search.txt', 'chunk_id': '869ae0c0-ab85-ca6f-e5d0-024381443c27', 'document_id': 'file-861837565219', 'token_count': 10.0, 'metadata_token_count': 13.0} (cite as <|file-861837565219|>)\nLlama 4 Maverick has 128 experts\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "END of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"description": "Search for information in a database.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The query to search for. Can be a natural language sentence or keywords."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"query"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "OEZj77MujzEilF"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "aZ37vwWHFrpGy"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " L",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "csghpwq82thpEG"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "lama",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "1dRxATyjFkzZ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "DkAEGxNVXrhL9KJ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "4",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "SI7v0ofTi6JL0LP"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Maver",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "tThgm0YItJ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "ick",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "5UnIV9ZM2koPE"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " model",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "pFPs5HfBSA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " has",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "CIT42IHpAEgx"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "jpXixTaXlYSxTu3"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "128",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "IBEKia6bwNtLB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " experts",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "hHMPPr4Q"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " <",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "iGTIWlxj9c2Equ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "wfQImUZLNC8Dtgc"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "file",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "m21wFuqSLpMN"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "-",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "CP5N1QxHqEnzbnq"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "861",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "jgQZ9egEpAiQv"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "837",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "viNedPoe13lJJ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "565",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "j2gGBSzOagN98"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "219",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "d4iMNITon2xM3"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "67lYY4LnZsfKd3U"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ">.",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "bMllpJPicr01Ip"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "ZgWEFMbo3w"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-05434d44cd8a",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": {
|
||||
"completion_tokens": 23,
|
||||
"prompt_tokens": 352,
|
||||
"total_tokens": 375,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "Wwt10anxWJDla"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
2098
tests/integration/responses/recordings/2f5d0087ba947141d94b9ba6462c03ff01d8f4948fedd8fd84cabfa80f5f0373.json
generated
Normal file
2098
tests/integration/responses/recordings/2f5d0087ba947141d94b9ba6462c03ff01d8f4948fedd8fd84cabfa80f5f0373.json
generated
Normal file
File diff suppressed because it is too large
Load diff
6318
tests/integration/responses/recordings/300c5041332a0ad2990a05df88a6b6842e02157d807564c136dc71cffe2b78cc.json
generated
Normal file
6318
tests/integration/responses/recordings/300c5041332a0ad2990a05df88a6b6842e02157d807564c136dc71cffe2b78cc.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,767 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts_pdf]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How many experts does the Llama 4 Maverick model have?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_M8gyYiB39MwYdJKc4aHIGbfA",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_M8gyYiB39MwYdJKc4aHIGbfA",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "knowledge_search tool found 2 chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "[1] document_id: file-379221123213, score: 0.008294223715346738, attributes: {'filename': 'llama_stack_and_models.pdf', 'chunk_id': 'c3556aea-3b73-0278-aa16-ebbdb4c18b18', 'document_id': 'file-379221123213', 'token_count': 98.0, 'metadata_token_count': 11.0} (cite as <|file-379221123213|>)\n, \nhardware\n \nvendors,\n \nand\n \nAI-focused\n \ncompanies)\n \nthat\n \noffer\n \ntailored\n \ninfrastructure,\n \nsoftware,\n \nand\n \nservices\n \nfor\n \ndeploying\n \nLlama\n \nmodels.\n \nLlama 4 Maverick \n Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts. \n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "[2] document_id: file-379221123213, score: 0.0033899213359898477, attributes: {'filename': 'llama_stack_and_models.pdf', 'chunk_id': '16d99c69-8323-27ce-3bd7-7b51dcac2735', 'document_id': 'file-379221123213', 'token_count': 498.0, 'metadata_token_count': 11.0} (cite as <|file-379221123213|>)\nLlama Stack \nLlama Stack Overview \nLlama Stack standardizes the core building blocks that simplify AI application development. It codifies best \npractices\n \nacross\n \nthe\n \nLlama\n \necosystem.\n \nMore\n \nspecifically,\n \nit\n \nprovides\n \u25cf Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry. \u25cf Plugin architecture to support the rich ecosystem of different API implementations in various \nenvironments,\n \nincluding\n \nlocal\n \ndevelopment,\n \non-premises,\n \ncloud,\n \nand\n \nmobile.\n \u25cf Prepackaged verified distributions which offer a one-stop solution for developers to get started quickly \nand\n \nreliably\n \nin\n \nany\n \nenvironment.\n \u25cf Multiple developer interfaces like CLI and SDKs for Python, Typescript, iOS, and Android. \u25cf Standalone applications as examples for how to build production-grade AI applications with Llama \nStack.\n \nLlama Stack Benefits \n\u25cf Flexible Options: Developers can choose their preferred infrastructure without changing APIs and enjoy \nflexible\n \ndeployment\n \nchoices.\n \u25cf Consistent Experience: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI \napplications\n \nwith\n \nconsistent\n \napplication\n \nbehavior.\n \u25cf Robust Ecosystem: Llama Stack is already integrated with distribution partners (cloud providers, \nhardware\n \nvendors,\n \nand\n \nAI-focused\n \ncompanies)\n \nthat\n \noffer\n \ntailored\n \ninfrastructure,\n \nsoftware,\n \nand\n \nservices\n \nfor\n \ndeploying\n \nLlama\n \nmodels.\n \nLlama 4 Maverick \n Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts. \n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "END of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"description": "Search for information in a database.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The query to search for. Can be a natural language sentence or keywords."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"query"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "SH6nRcfXzd8qPg"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "vbJu1mhpQKtNr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " L",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "uAUiYAVpMW8Ph9"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "lama",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "DJxjs1HFugOD"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "sU2IncrauGmuYki"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "4",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "IkZbrWS45cqkmqi"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Maver",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "YbZYhGgoGE"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "ick",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "7FtHnapGtkc09"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " model",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "8P3mUr7HfV"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " has",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "WxYXJUfkyxqZ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "E9hIXNC7oeJcZ8v"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "128",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "L9ww7cI1pSSt3"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " experts",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "hHao5x7a"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " <",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "0cwygEJttBgv7M"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "KYVCnE5AA6MnQ0Y"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "file",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "N3DcYBcrQDzD"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "-",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "CUpjI7Qo17k4aeo"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "379",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "s1694CAHwowUf"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "221",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "I94vCKkpQNsx6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "123",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "RNfAfPtJK3KHE"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "213",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "Gk04vo9RXpl3P"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "rkWPIUdNABAeP7V"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ">.",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "GIF1vPXxInWrhl"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "Oa1imYdRme"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-40985d2e0ff8",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": {
|
||||
"completion_tokens": 23,
|
||||
"prompt_tokens": 1048,
|
||||
"total_tokens": 1071,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "0Xx3txQF13S"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
3008
tests/integration/responses/recordings/40a41380ede0dd4635618b64a166f89979aa6c479a626155f36045a677abe944.json
generated
Normal file
3008
tests/integration/responses/recordings/40a41380ede0dd4635618b64a166f89979aa6c479a626155f36045a677abe944.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,925 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How many experts does the Llama 4 Maverick model have?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_FzhOmTdZThRndI5rSASPdAqr",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_FzhOmTdZThRndI5rSASPdAqr",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "[1] document_id: file-797509666839, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-797509666839', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-797509666839|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "END of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"description": "Search for information in a database.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The query to search for. Can be a natural language sentence or keywords."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"query"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "Lk9Xf7hCFPS2tT"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "14pQ6XFvX7eSh"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " L",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "gPEg73EpAxR3FC"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "lama",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "ZWJl6Mzcv95d"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "zEYaSNtwtGmhfwy"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "4",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "2tesGAvAkEOb8T6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Maver",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "Hykn5kSQlG"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "ick",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "xWW13SGjSybVX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " model",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "fAZjisJ63a"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " has",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "FlTpZNfFG6rX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "9J9VrtXuLHug6II"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "128",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "0EckZGr823mA9"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " experts",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "dW7O5HFR"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " in",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "5dRdaDvaXumkV"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " its",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "kD1aZsGwZhMx"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " mixture",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "IpxDJF0p"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " of",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "WbnOG310xKaLq"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " experts",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "sh58U2d8"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " architecture",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "El3"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " <",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "u3EtYZFJGaheZj"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "QjdqqIuk8c7wMUp"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "file",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "Zqcwf53n0hUw"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "-",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "DfFLPM5V45QUiAm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "797",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "55snCUEJgoLyX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "509",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "pCqEKhy1wq8Vl"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "666",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "c5QnCsKzuhFd0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "839",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "jFSbryUeH7ZyA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "uHktQBYsC92laeK"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ">.",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "UUxHP1QGdz8MdR"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "uExxZzWuXd"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-454a64d08460",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": {
|
||||
"completion_tokens": 29,
|
||||
"prompt_tokens": 359,
|
||||
"total_tokens": 388,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "EjpA6XzHVgcj8"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
|
|
@ -0,0 +1,631 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How many experts does the Llama 4 Maverick model have?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_FzhOmTdZThRndI5rSASPdAqr",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_FzhOmTdZThRndI5rSASPdAqr",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "[1] document_id: file-797509666839, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-797509666839', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-797509666839|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "END of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture <|file-797509666839|>."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Can you tell me more about the architecture?"
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"description": "Search for information in a database.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The query to search for. Can be a natural language sentence or keywords."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"query"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_y4Py1L2VscRQ5IBZ7gGpqpWv",
|
||||
"function": {
|
||||
"arguments": "",
|
||||
"name": "knowledge_search"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "iFdF"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "{\"",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "gIC"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "query",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "P"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\":\"",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "p"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "L",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "TAVud"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "lama",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "CX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": " ",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "hHmE5"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "4",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "CN4uS"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": " Maver",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "ick",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "0kI"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": " model",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": " architecture",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "dyryTBF49"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\"}",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "BHV"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "qrKh"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-4d749d8c25ad",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": {
|
||||
"completion_tokens": 22,
|
||||
"prompt_tokens": 404,
|
||||
"total_tokens": 426,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "ecpBTD3qjc75r"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
2144
tests/integration/responses/recordings/5a3033c4d989d68cc418014d7b8ed7bbb5d6e538bd3620dec2f846e0c8fa52f8.json
generated
Normal file
2144
tests/integration/responses/recordings/5a3033c4d989d68cc418014d7b8ed7bbb5d6e538bd3620dec2f846e0c8fa52f8.json
generated
Normal file
File diff suppressed because it is too large
Load diff
1018
tests/integration/responses/recordings/6d20aac5318b8bf5803c05c224e7ca6d5b5951df5408e6bca3d0ba2b963f2c73.json
generated
Normal file
1018
tests/integration/responses/recordings/6d20aac5318b8bf5803c05c224e7ca6d5b5951df5408e6bca3d0ba2b963f2c73.json
generated
Normal file
File diff suppressed because it is too large
Load diff
5211
tests/integration/responses/recordings/6e5759a3bd65f94c5ec325ee211fcae819b51d6877edc656548d863bd9b5652e.json
generated
Normal file
5211
tests/integration/responses/recordings/6e5759a3bd65f94c5ec325ee211fcae819b51d6877edc656548d863bd9b5652e.json
generated
Normal file
File diff suppressed because it is too large
Load diff
1118
tests/integration/responses/recordings/82038830a1ad60e4e01fb5efafd760b6327f0b7e6e7fa4e80518bff9f6002e8f.json
generated
Normal file
1118
tests/integration/responses/recordings/82038830a1ad60e4e01fb5efafd760b6327f0b7e6e7fa4e80518bff9f6002e8f.json
generated
Normal file
File diff suppressed because it is too large
Load diff
1942
tests/integration/responses/recordings/882e7f0e5fcfe9f3276692c344dc2fee082b189494dd4f4829825adc90a79d9c.json
generated
Normal file
1942
tests/integration/responses/recordings/882e7f0e5fcfe9f3276692c344dc2fee082b189494dd4f4829825adc90a79d9c.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,763 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How many experts does the Llama 4 Maverick model have?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"arguments": "{\"query\":\"Llama 4 Maverick model experts count\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "[1] document_id: file-864460993305, score: 0.011418752464355166, attributes: {'filename': 'test_response_non_streaming_file_search.txt', 'chunk_id': '869ae0c0-ab85-ca6f-e5d0-024381443c27', 'document_id': 'file-864460993305', 'token_count': 10.0, 'metadata_token_count': 13.0} (cite as <|file-864460993305|>)\nLlama 4 Maverick has 128 experts\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "END of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model experts count\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"description": "Search for information in a database.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The query to search for. Can be a natural language sentence or keywords."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"query"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "VvS2zeV5Z8apdX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "NeElmbFuPxg9F"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " L",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "RA2Dv6fH3Xp28d"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "lama",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "mk2wpBSl9esL"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "WkghQrNy7WNFz7S"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "4",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "LOo1ya1Av8yejuX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Maver",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "Uj02OVTEBb"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "ick",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "7s3FiwwwgzGhy"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " model",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "WExrPT6Yjd"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " has",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "vbf0YwoBbJsB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "vYIgV2n0AuxwZ9F"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "128",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "lAS4gXrK4sNoq"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " experts",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "90lGUcaB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " <",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "mnFZfKgXWsjWZe"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "eOcwjhvK0vIp2nj"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "file",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "5TijFZHKoeGs"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "-",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "MWGjx7wiu4tdFha"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "864",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "k9VH32AhyY519"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "460",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "dWxZtp4i8KhxZ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "993",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "u2WHjDkGJE2hg"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "305",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "6fckZytfB9iS5"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "YGOP75uha3KyHao"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ">.",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "emmym2mGHhvw9Q"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "GoEMFfNFBW"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-baa0ba98b7f3",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": {
|
||||
"completion_tokens": 23,
|
||||
"prompt_tokens": 350,
|
||||
"total_tokens": 373,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "ec6S325i8izl1"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
1276
tests/integration/responses/recordings/bb43ffac0034ef7fcca1786bcb53106b37f70f053c38a92e225f4107e48c9c72.json
generated
Normal file
1276
tests/integration/responses/recordings/bb43ffac0034ef7fcca1786bcb53106b37f70f053c38a92e225f4107e48c9c72.json
generated
Normal file
File diff suppressed because it is too large
Load diff
1901
tests/integration/responses/recordings/bb8ad4fa0847c0b408d8bfeb6cc6bc65d4afece55df8e8187dfdbf75d57b13ba.json
generated
Normal file
1901
tests/integration/responses/recordings/bb8ad4fa0847c0b408d8bfeb6cc6bc65d4afece55df8e8187dfdbf75d57b13ba.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,631 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How many experts does the Llama 4 Maverick model have?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_4ac6gxccWFxDvEl8BizY3BJw",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_4ac6gxccWFxDvEl8BizY3BJw",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "[1] document_id: file-528246887823, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-528246887823', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-528246887823|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "END of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture <|file-528246887823|>."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Can you tell me more about the architecture?"
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"description": "Search for information in a database.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The query to search for. Can be a natural language sentence or keywords."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"query"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_2dn6pQIic4tAhxL0Q3R9v9oy",
|
||||
"function": {
|
||||
"arguments": "",
|
||||
"name": "knowledge_search"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "U5u2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "{\"",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "rC6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "query",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "4"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\":\"",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "E"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "L",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "U1RKZ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "lama",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "N9"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": " ",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "eCM84"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "4",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "RNtZo"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": " Maver",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "ick",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "OmQ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": " model",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": " architecture",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "Hd8hPZl2u"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\"}",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "5bs"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "eMIj"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-c0b147807a41",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": {
|
||||
"completion_tokens": 22,
|
||||
"prompt_tokens": 404,
|
||||
"total_tokens": 426,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "ofat2LchRvz8V"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
|
|
@ -0,0 +1,925 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How many experts does the Llama 4 Maverick model have?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_4ac6gxccWFxDvEl8BizY3BJw",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_4ac6gxccWFxDvEl8BizY3BJw",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "[1] document_id: file-528246887823, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-528246887823', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-528246887823|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "END of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"description": "Search for information in a database.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The query to search for. Can be a natural language sentence or keywords."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"query"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "DzrEfuLOuw4cnb"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "CsVsWYnTMLfCu"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " L",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "45hLla9Dhdu3x9"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "lama",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "AhCUnf7tqKqC"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "gvAEwnHAgMzITVb"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "4",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "mGUFWICkd1S0jlx"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Maver",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "e85JCyNVPe"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "ick",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "5vQf0h4IJTGGt"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " model",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "anovsNqaSC"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " has",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "fS6GYg8pBO8Q"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "vO7onsnvWf5kjUI"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "128",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "pdFjXciA0pN5w"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " experts",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "eMMaKcAW"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " in",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "JFDRUy7B9ktO0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " its",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "QlQIiohVPMVQ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " mixture",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "UuR2QmMR"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " of",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "t0uvHdtkB4Fsl"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " experts",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "3G1KX2gw"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " architecture",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "x2J"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " <",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "fbLYZDlS7xvywf"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "vAxoGpf245DPeM8"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "file",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "gLu1ZShAlH4C"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "-",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "PdMvc8X2LtbhyFU"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "528",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "0S00nwBZD0Cah"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "246",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "fa7s8AYzHjMph"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "887",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "hrwMBgH8bsKYT"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "823",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "NBJ8yJWJjBCCQ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "AAzbONdy9ExzSBR"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ">.",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "THiCsk4cqjABWJ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "rzm64SnHTE"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-cf185c868634",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": {
|
||||
"completion_tokens": 29,
|
||||
"prompt_tokens": 359,
|
||||
"total_tokens": 388,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "AnUv1BxAB2uOY"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
|
|
@ -0,0 +1,952 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_file_search.py::test_response_file_search_filter_compound_and[client_with_models-txt=openai/gpt-4o]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What are the engineering updates from the US?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_rST37XuKuJQcEBfmoTnNQzNe",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"arguments": "{\"query\":\"engineering updates from the US\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_rST37XuKuJQcEBfmoTnNQzNe",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "[1] document_id: file-710505118847, score: 0.005345607610573921, attributes: {'region': 'us', 'category': 'engineering', 'date': 1680307200.0, 'filename': 'us_engineering_q2.txt', 'chunk_id': '084e15ad-480a-eae8-9242-391c53854867', 'document_id': 'file-710505118847', 'token_count': 18.0, 'metadata_token_count': 32.0} (cite as <|file-710505118847|>)\nUS technical updates for Q2 2023. New features deployed in the US region.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "END of knowledge_search tool results.\n"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The above results were retrieved to help answer the user's query: \"engineering updates from the US\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "knowledge_search",
|
||||
"description": "Search for information in a database.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The query to search for. Can be a natural language sentence or keywords."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"query"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "CVT4TMzBPNlTqA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "Rlj8tcP3E7bOB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " engineering",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "8lga"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " updates",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "6fwO0WkR"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " from",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "BryajibrQvv"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " the",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "iTlMgikEguMP"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " US",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "79xbcCa6na7en"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " include",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "q7q4AkjT"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " new",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "fiyvaDyv5eet"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " features",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "cBkhZfR"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " deployed",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "EaW5Ixt"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " in",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "xLVfGMTiR4OMS"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " the",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "cncqZQApoIjH"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " region",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "yiSqVtnqF"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " for",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "sbDWGbV8OoYi"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Q",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "E1ZJCGd5c2IH7b"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "2",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "agHXieAbH98A2VE"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " ",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "Ht3DkQwQs7t32Aw"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "202",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "j4r88Vvqcm7VY"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "3",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "pv9GLKOSpa0BHEr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " <",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "iBXT8JWz9X1J1q"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "D1gi2w0f0DN5n3k"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "file",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "zxHM3I5wmPGU"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "-",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "Gl7oL62eU6xIrUp"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "710",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "l4RX4sx1BfQA6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "505",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "AGyEWqU2sDL6e"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "118",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "BReQxn8kTEiA5"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "847",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "yN9PEtunpAkNv"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "|",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "bKBLmRBkxlk61fP"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ">.",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "077BDwQit7hWfz"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": null,
|
||||
"obfuscation": "LOYztD3Yfb"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-d6f74a7dd25a",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_a788c5aef0",
|
||||
"usage": {
|
||||
"completion_tokens": 30,
|
||||
"prompt_tokens": 364,
|
||||
"total_tokens": 394,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "9lHtlsx9YsVH6"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
1456
tests/integration/responses/recordings/dd67347dee58190dea53588f8914211d279b80b6198cbe8b8b789fad2a0d0687.json
generated
Normal file
1456
tests/integration/responses/recordings/dd67347dee58190dea53588f8914211d279b80b6198cbe8b8b789fad2a0d0687.json
generated
Normal file
File diff suppressed because it is too large
Load diff
2549
tests/integration/responses/recordings/f4cfc578243d8c3e2e61488bfcfc571cbc160a97dc570076a869d4fec1dc8c52.json
generated
Normal file
2549
tests/integration/responses/recordings/f4cfc578243d8c3e2e61488bfcfc571cbc160a97dc570076a869d4fec1dc8c52.json
generated
Normal file
File diff suppressed because it is too large
Load diff
2279
tests/integration/responses/recordings/fa055fef7ea5386adaeaa5ddea61a417c161c49f64b9d92de0b96f4a892bc83c.json
generated
Normal file
2279
tests/integration/responses/recordings/fa055fef7ea5386adaeaa5ddea61a417c161c49f64b9d92de0b96f4a892bc83c.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -82,23 +82,37 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
|
|||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_chunks():
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
chunks_data = [
|
||||
(
|
||||
"Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
|
||||
"doc1",
|
||||
"programming",
|
||||
),
|
||||
(
|
||||
"Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
|
||||
"doc2",
|
||||
"ai",
|
||||
),
|
||||
(
|
||||
"Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
|
||||
"doc3",
|
||||
"computer_science",
|
||||
),
|
||||
(
|
||||
"Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
|
||||
"doc4",
|
||||
"ai",
|
||||
),
|
||||
]
|
||||
return [
|
||||
Chunk(
|
||||
content="Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
|
||||
metadata={"document_id": "doc1", "topic": "programming"},
|
||||
),
|
||||
Chunk(
|
||||
content="Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
|
||||
metadata={"document_id": "doc2", "topic": "ai"},
|
||||
),
|
||||
Chunk(
|
||||
content="Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
|
||||
metadata={"document_id": "doc3", "topic": "computer_science"},
|
||||
),
|
||||
Chunk(
|
||||
content="Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
|
||||
metadata={"document_id": "doc4", "topic": "ai"},
|
||||
),
|
||||
content=content,
|
||||
chunk_id=generate_chunk_id(doc_id, content),
|
||||
metadata={"document_id": doc_id, "topic": topic},
|
||||
)
|
||||
for content, doc_id, topic in chunks_data
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -13,23 +13,33 @@ from ..conftest import vector_provider_wrapper
|
|||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_chunks():
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
chunks_data = [
|
||||
(
|
||||
"Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
|
||||
"doc1",
|
||||
),
|
||||
(
|
||||
"Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
|
||||
"doc2",
|
||||
),
|
||||
(
|
||||
"Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
|
||||
"doc3",
|
||||
),
|
||||
(
|
||||
"Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
|
||||
"doc4",
|
||||
),
|
||||
]
|
||||
return [
|
||||
Chunk(
|
||||
content="Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
|
||||
metadata={"document_id": "doc1"},
|
||||
),
|
||||
Chunk(
|
||||
content="Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
|
||||
metadata={"document_id": "doc2"},
|
||||
),
|
||||
Chunk(
|
||||
content="Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
|
||||
metadata={"document_id": "doc3"},
|
||||
),
|
||||
Chunk(
|
||||
content="Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
|
||||
metadata={"document_id": "doc4"},
|
||||
),
|
||||
content=content,
|
||||
chunk_id=generate_chunk_id(doc_id, content),
|
||||
metadata={"document_id": doc_id},
|
||||
)
|
||||
for content, doc_id in chunks_data
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -168,6 +178,7 @@ def test_insert_chunks_with_precomputed_embeddings(
|
|||
chunks_with_embeddings = [
|
||||
Chunk(
|
||||
content="This is a test chunk with precomputed embedding.",
|
||||
chunk_id="chunk1",
|
||||
metadata={"document_id": "doc1", "source": "precomputed", "chunk_id": "chunk1"},
|
||||
embedding=[0.1] * int(embedding_dimension),
|
||||
),
|
||||
|
|
@ -215,9 +226,12 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
|
|||
|
||||
actual_vector_store_id = register_response.id
|
||||
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
chunks_with_embeddings = [
|
||||
Chunk(
|
||||
content="duplicate",
|
||||
chunk_id=generate_chunk_id("doc1", "duplicate"),
|
||||
metadata={"document_id": "doc1", "source": "precomputed"},
|
||||
embedding=[0.1] * int(embedding_dimension),
|
||||
),
|
||||
|
|
|
|||
|
|
@ -192,18 +192,18 @@ async def test_create_agent_session_persistence(agents_impl, sample_agent_config
|
|||
assert session_response.session_id is not None
|
||||
|
||||
# Verify the session was stored
|
||||
session = await agents_impl.get_agents_session(agent_id, session_response.session_id)
|
||||
session = await agents_impl.get_agents_session(session_response.session_id, agent_id)
|
||||
assert session.session_name == "test_session"
|
||||
assert session.session_id == session_response.session_id
|
||||
assert session.started_at is not None
|
||||
assert session.turns == []
|
||||
|
||||
# Delete the session
|
||||
await agents_impl.delete_agents_session(agent_id, session_response.session_id)
|
||||
await agents_impl.delete_agents_session(session_response.session_id, agent_id)
|
||||
|
||||
# Verify the session was deleted
|
||||
with pytest.raises(ValueError):
|
||||
await agents_impl.get_agents_session(agent_id, session_response.session_id)
|
||||
await agents_impl.get_agents_session(session_response.session_id, agent_id)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enable_session_persistence", [True, False])
|
||||
|
|
@ -226,11 +226,11 @@ async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config,
|
|||
assert session2.session_id in session_ids
|
||||
|
||||
# Delete one session
|
||||
await agents_impl.delete_agents_session(agent_id, session1.session_id)
|
||||
await agents_impl.delete_agents_session(session1.session_id, agent_id)
|
||||
|
||||
# Verify the session was deleted
|
||||
with pytest.raises(ValueError):
|
||||
await agents_impl.get_agents_session(agent_id, session1.session_id)
|
||||
await agents_impl.get_agents_session(session1.session_id, agent_id)
|
||||
|
||||
# List sessions again
|
||||
sessions = await agents_impl.list_agent_sessions(agent_id)
|
||||
|
|
|
|||
|
|
@ -43,9 +43,15 @@ def embedding_dimension() -> int:
|
|||
@pytest.fixture(scope="session")
|
||||
def sample_chunks():
|
||||
"""Generates chunks that force multiple batches for a single document to expose ID conflicts."""
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
n, k = 10, 3
|
||||
sample = [
|
||||
Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"})
|
||||
Chunk(
|
||||
content=f"Sentence {i} from document {j}",
|
||||
chunk_id=generate_chunk_id(f"document-{j}", f"Sentence {i} from document {j}"),
|
||||
metadata={"document_id": f"document-{j}"},
|
||||
)
|
||||
for j in range(k)
|
||||
for i in range(n)
|
||||
]
|
||||
|
|
@ -53,6 +59,7 @@ def sample_chunks():
|
|||
[
|
||||
Chunk(
|
||||
content=f"Sentence {i} from document {j + k}",
|
||||
chunk_id=f"document-{j}-chunk-{i}",
|
||||
chunk_metadata=ChunkMetadata(
|
||||
document_id=f"document-{j + k}",
|
||||
chunk_id=f"document-{j}-chunk-{i}",
|
||||
|
|
@ -73,6 +80,7 @@ def sample_chunks_with_metadata():
|
|||
sample = [
|
||||
Chunk(
|
||||
content=f"Sentence {i} from document {j}",
|
||||
chunk_id=f"document-{j}-chunk-{i}",
|
||||
metadata={"document_id": f"document-{j}"},
|
||||
chunk_metadata=ChunkMetadata(
|
||||
document_id=f"document-{j}",
|
||||
|
|
|
|||
|
|
@ -49,9 +49,21 @@ def vector_store_id():
|
|||
|
||||
@pytest.fixture
|
||||
def sample_chunks():
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
return [
|
||||
Chunk(content="MOCK text content 1", mime_type="text/plain", metadata={"document_id": "mock-doc-1"}),
|
||||
Chunk(content="MOCK text content 1", mime_type="text/plain", metadata={"document_id": "mock-doc-2"}),
|
||||
Chunk(
|
||||
content="MOCK text content 1",
|
||||
chunk_id=generate_chunk_id("mock-doc-1", "MOCK text content 1"),
|
||||
mime_type="text/plain",
|
||||
metadata={"document_id": "mock-doc-1"},
|
||||
),
|
||||
Chunk(
|
||||
content="MOCK text content 1",
|
||||
chunk_id=generate_chunk_id("mock-doc-2", "MOCK text content 1"),
|
||||
mime_type="text/plain",
|
||||
metadata={"document_id": "mock-doc-2"},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -434,9 +434,15 @@ async def test_query_chunks_hybrid_tie_breaking(
|
|||
sqlite_vec_index, sample_embeddings, embedding_dimension, tmp_path_factory
|
||||
):
|
||||
"""Test tie-breaking and determinism when scores are equal."""
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
# Create two chunks with the same content and embedding
|
||||
chunk1 = Chunk(content="identical", metadata={"document_id": "docA"})
|
||||
chunk2 = Chunk(content="identical", metadata={"document_id": "docB"})
|
||||
chunk1 = Chunk(
|
||||
content="identical", chunk_id=generate_chunk_id("docA", "identical"), metadata={"document_id": "docA"}
|
||||
)
|
||||
chunk2 = Chunk(
|
||||
content="identical", chunk_id=generate_chunk_id("docB", "identical"), metadata={"document_id": "docB"}
|
||||
)
|
||||
chunks = [chunk1, chunk2]
|
||||
# Use the same embedding for both chunks to ensure equal scores
|
||||
same_embedding = sample_embeddings[0]
|
||||
|
|
|
|||
|
|
@ -135,10 +135,24 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
|
|||
vector_io_adapter.cache["db1"] = fake_index
|
||||
|
||||
# Various document_id scenarios that shouldn't crash
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
chunks = [
|
||||
Chunk(content="has doc_id in metadata", metadata={"document_id": "doc-1"}),
|
||||
Chunk(content="no doc_id anywhere", metadata={"source": "test"}),
|
||||
Chunk(content="doc_id in chunk_metadata", chunk_metadata=ChunkMetadata(document_id="doc-3")),
|
||||
Chunk(
|
||||
content="has doc_id in metadata",
|
||||
chunk_id=generate_chunk_id("doc-1", "has doc_id in metadata"),
|
||||
metadata={"document_id": "doc-1"},
|
||||
),
|
||||
Chunk(
|
||||
content="no doc_id anywhere",
|
||||
chunk_id=generate_chunk_id("unknown", "no doc_id anywhere"),
|
||||
metadata={"source": "test"},
|
||||
),
|
||||
Chunk(
|
||||
content="doc_id in chunk_metadata",
|
||||
chunk_id=generate_chunk_id("doc-3", "doc_id in chunk_metadata"),
|
||||
chunk_metadata=ChunkMetadata(document_id="doc-3"),
|
||||
),
|
||||
]
|
||||
|
||||
# Should work without KeyError
|
||||
|
|
@ -151,7 +165,9 @@ async def test_document_id_with_invalid_type_raises_error():
|
|||
from llama_stack.apis.vector_io import Chunk
|
||||
|
||||
# Integer document_id should raise TypeError
|
||||
chunk = Chunk(content="test", metadata={"document_id": 12345})
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
chunk = Chunk(content="test", chunk_id=generate_chunk_id("test", "test"), metadata={"document_id": 12345})
|
||||
with pytest.raises(TypeError) as exc_info:
|
||||
_ = chunk.document_id
|
||||
assert "metadata['document_id'] must be a string" in str(exc_info.value)
|
||||
|
|
@ -159,7 +175,9 @@ async def test_document_id_with_invalid_type_raises_error():
|
|||
|
||||
|
||||
async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter):
|
||||
expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1])
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
expected = QueryChunksResponse(chunks=[Chunk(content="c1", chunk_id=generate_chunk_id("test", "c1"))], scores=[0.1])
|
||||
fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected))
|
||||
vector_io_adapter.cache["db1"] = fake_index
|
||||
|
||||
|
|
|
|||
|
|
@ -18,13 +18,12 @@ from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
|||
|
||||
|
||||
def test_generate_chunk_id():
|
||||
chunks = [
|
||||
Chunk(content="test", metadata={"document_id": "doc-1"}),
|
||||
Chunk(content="test ", metadata={"document_id": "doc-1"}),
|
||||
Chunk(content="test 3", metadata={"document_id": "doc-1"}),
|
||||
]
|
||||
"""Test that generate_chunk_id produces expected hashes."""
|
||||
chunk_id1 = generate_chunk_id("doc-1", "test")
|
||||
chunk_id2 = generate_chunk_id("doc-1", "test ")
|
||||
chunk_id3 = generate_chunk_id("doc-1", "test 3")
|
||||
|
||||
chunk_ids = sorted([chunk.chunk_id for chunk in chunks])
|
||||
chunk_ids = sorted([chunk_id1, chunk_id2, chunk_id3])
|
||||
assert chunk_ids == [
|
||||
"31d1f9a3-c8d2-66e7-3c37-af2acd329778",
|
||||
"d07dade7-29c0-cda7-df29-0249a1dcbc3e",
|
||||
|
|
@ -33,42 +32,49 @@ def test_generate_chunk_id():
|
|||
|
||||
|
||||
def test_generate_chunk_id_with_window():
|
||||
chunk = Chunk(content="test", metadata={"document_id": "doc-1"})
|
||||
"""Test that generate_chunk_id with chunk_window produces different IDs."""
|
||||
# Create a chunk object to match the original test behavior (passing object to generate_chunk_id)
|
||||
chunk = Chunk(content="test", chunk_id="placeholder", metadata={"document_id": "doc-1"})
|
||||
chunk_id1 = generate_chunk_id("doc-1", chunk, chunk_window="0-1")
|
||||
chunk_id2 = generate_chunk_id("doc-1", chunk, chunk_window="1-2")
|
||||
assert chunk_id1 == "8630321a-d9cb-2bb6-cd28-ebf68dafd866"
|
||||
assert chunk_id2 == "13a1c09a-cbda-b61a-2d1a-7baa90888685"
|
||||
# Verify that different windows produce different IDs
|
||||
assert chunk_id1 != chunk_id2
|
||||
assert len(chunk_id1) == 36 # Valid UUID format
|
||||
assert len(chunk_id2) == 36 # Valid UUID format
|
||||
|
||||
|
||||
def test_chunk_id():
|
||||
# Test with existing chunk ID
|
||||
chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"})
|
||||
assert chunk_with_id.chunk_id == "11704f92-42b6-61df-bf85-6473e7708fbd"
|
||||
|
||||
# Test with document ID in metadata
|
||||
chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
|
||||
assert chunk_with_doc_id.chunk_id == generate_chunk_id("doc-1", "test")
|
||||
|
||||
# Test chunks with ChunkMetadata
|
||||
chunk_with_metadata = Chunk(
|
||||
def test_chunk_creation_with_explicit_id():
|
||||
"""Test that chunks can be created with explicit chunk_id."""
|
||||
chunk_id = generate_chunk_id("doc-1", "test")
|
||||
chunk = Chunk(
|
||||
content="test",
|
||||
metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"},
|
||||
chunk_id=chunk_id,
|
||||
metadata={"document_id": "doc-1"},
|
||||
)
|
||||
assert chunk.chunk_id == chunk_id
|
||||
assert chunk.chunk_id == "31d1f9a3-c8d2-66e7-3c37-af2acd329778"
|
||||
|
||||
|
||||
def test_chunk_with_metadata():
|
||||
"""Test chunks with ChunkMetadata."""
|
||||
chunk_id = "chunk-id-1"
|
||||
chunk = Chunk(
|
||||
content="test",
|
||||
chunk_id=chunk_id,
|
||||
metadata={"document_id": "existing-id"},
|
||||
chunk_metadata=ChunkMetadata(document_id="document_1"),
|
||||
)
|
||||
assert chunk_with_metadata.chunk_id == "chunk-id-1"
|
||||
|
||||
# Test with no ID or document ID
|
||||
chunk_without_id = Chunk(content="test")
|
||||
generated_id = chunk_without_id.chunk_id
|
||||
assert isinstance(generated_id, str) and len(generated_id) == 36 # Should be a valid UUID
|
||||
assert chunk.chunk_id == "chunk-id-1"
|
||||
assert chunk.document_id == "existing-id" # metadata takes precedence
|
||||
|
||||
|
||||
def test_stored_chunk_id_alias():
|
||||
# Test with existing chunk ID alias
|
||||
chunk_with_alias = Chunk(content="test", metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"})
|
||||
assert chunk_with_alias.chunk_id == "chunk-id-1"
|
||||
serialized_chunk = chunk_with_alias.model_dump()
|
||||
assert serialized_chunk["stored_chunk_id"] == "chunk-id-1"
|
||||
# showing chunk_id is not serialized (i.e., a computed field)
|
||||
assert "chunk_id" not in serialized_chunk
|
||||
assert chunk_with_alias.stored_chunk_id == "chunk-id-1"
|
||||
def test_chunk_serialization():
|
||||
"""Test that chunk_id is properly serialized."""
|
||||
chunk = Chunk(
|
||||
content="test",
|
||||
chunk_id="test-chunk-id",
|
||||
metadata={"document_id": "doc-1"},
|
||||
)
|
||||
serialized_chunk = chunk.model_dump()
|
||||
assert serialized_chunk["chunk_id"] == "test-chunk-id"
|
||||
assert "chunk_id" in serialized_chunk
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ class TestRagQuery:
|
|||
interleaved_content = MagicMock()
|
||||
chunk = Chunk(
|
||||
content=interleaved_content,
|
||||
chunk_id="chunk1",
|
||||
metadata={
|
||||
"key1": "value1",
|
||||
"token_count": 10,
|
||||
|
|
@ -48,7 +49,6 @@ class TestRagQuery:
|
|||
# Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert()
|
||||
"document_id": "doc1",
|
||||
},
|
||||
stored_chunk_id="chunk1",
|
||||
chunk_metadata=chunk_metadata,
|
||||
)
|
||||
|
||||
|
|
@ -101,8 +101,8 @@ class TestRagQuery:
|
|||
)
|
||||
chunk1 = Chunk(
|
||||
content="chunk from db1",
|
||||
chunk_id="c1",
|
||||
metadata={"vector_store_id": "db1", "document_id": "doc1"},
|
||||
stored_chunk_id="c1",
|
||||
chunk_metadata=chunk_metadata1,
|
||||
)
|
||||
|
||||
|
|
@ -114,8 +114,8 @@ class TestRagQuery:
|
|||
)
|
||||
chunk2 = Chunk(
|
||||
content="chunk from db2",
|
||||
chunk_id="c2",
|
||||
metadata={"vector_store_id": "db2", "document_id": "doc2"},
|
||||
stored_chunk_id="c2",
|
||||
chunk_metadata=chunk_metadata2,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|||
content_from_doc,
|
||||
make_overlapped_chunks,
|
||||
)
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
|
||||
# Depending on the machine, this can get parsed a couple of ways
|
||||
|
|
@ -53,6 +54,7 @@ class TestChunk:
|
|||
def test_chunk(self):
|
||||
chunk = Chunk(
|
||||
content="Example chunk content",
|
||||
chunk_id=generate_chunk_id("test-doc", "Example chunk content"),
|
||||
metadata={"key": "value"},
|
||||
embedding=[0.1, 0.2, 0.3],
|
||||
)
|
||||
|
|
@ -63,6 +65,7 @@ class TestChunk:
|
|||
|
||||
chunk_no_embedding = Chunk(
|
||||
content="Example chunk content",
|
||||
chunk_id=generate_chunk_id("test-doc", "Example chunk content"),
|
||||
metadata={"key": "value"},
|
||||
)
|
||||
assert chunk_no_embedding.embedding is None
|
||||
|
|
@ -218,8 +221,8 @@ class TestVectorStoreWithIndex:
|
|||
)
|
||||
|
||||
chunks = [
|
||||
Chunk(content="Test 1", embedding=None, metadata={}),
|
||||
Chunk(content="Test 2", embedding=None, metadata={}),
|
||||
Chunk(content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}),
|
||||
Chunk(content="Test 2", chunk_id=generate_chunk_id("test-doc", "Test 2"), embedding=None, metadata={}),
|
||||
]
|
||||
|
||||
mock_inference_api.openai_embeddings.return_value.data = [
|
||||
|
|
@ -254,8 +257,18 @@ class TestVectorStoreWithIndex:
|
|||
)
|
||||
|
||||
chunks = [
|
||||
Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3], metadata={}),
|
||||
Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}),
|
||||
Chunk(
|
||||
content="Test 1",
|
||||
chunk_id=generate_chunk_id("test-doc", "Test 1"),
|
||||
embedding=[0.1, 0.2, 0.3],
|
||||
metadata={},
|
||||
),
|
||||
Chunk(
|
||||
content="Test 2",
|
||||
chunk_id=generate_chunk_id("test-doc", "Test 2"),
|
||||
embedding=[0.4, 0.5, 0.6],
|
||||
metadata={},
|
||||
),
|
||||
]
|
||||
|
||||
await vector_store_with_index.insert_chunks(chunks)
|
||||
|
|
@ -279,25 +292,47 @@ class TestVectorStoreWithIndex:
|
|||
|
||||
# Verify Chunk raises ValueError for invalid embedding type
|
||||
with pytest.raises(ValueError, match="Input should be a valid list"):
|
||||
Chunk(content="Test 1", embedding="invalid_type", metadata={})
|
||||
Chunk(
|
||||
content="Test 1",
|
||||
chunk_id=generate_chunk_id("test-doc", "Test 1"),
|
||||
embedding="invalid_type",
|
||||
metadata={},
|
||||
)
|
||||
|
||||
# Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
|
||||
with pytest.raises(ValueError, match="Input should be a valid list"):
|
||||
await vector_store_with_index.insert_chunks(
|
||||
[
|
||||
Chunk(content="Test 1", embedding=None, metadata={}),
|
||||
Chunk(content="Test 2", embedding="invalid_type", metadata={}),
|
||||
Chunk(
|
||||
content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}
|
||||
),
|
||||
Chunk(
|
||||
content="Test 2",
|
||||
chunk_id=generate_chunk_id("test-doc", "Test 2"),
|
||||
embedding="invalid_type",
|
||||
metadata={},
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
# Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
|
||||
with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "):
|
||||
await vector_store_with_index.insert_chunks(
|
||||
Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={})
|
||||
Chunk(
|
||||
content="Test 1",
|
||||
chunk_id=generate_chunk_id("test-doc", "Test 1"),
|
||||
embedding=[0.1, "string", 0.3],
|
||||
metadata={},
|
||||
)
|
||||
)
|
||||
|
||||
chunks_wrong_dim = [
|
||||
Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}),
|
||||
Chunk(
|
||||
content="Test 1",
|
||||
chunk_id=generate_chunk_id("test-doc", "Test 1"),
|
||||
embedding=[0.1, 0.2, 0.3, 0.4],
|
||||
metadata={},
|
||||
),
|
||||
]
|
||||
with pytest.raises(ValueError, match="has dimension 4, expected 3"):
|
||||
await vector_store_with_index.insert_chunks(chunks_wrong_dim)
|
||||
|
|
@ -317,9 +352,14 @@ class TestVectorStoreWithIndex:
|
|||
)
|
||||
|
||||
chunks = [
|
||||
Chunk(content="Test 1", embedding=None, metadata={}),
|
||||
Chunk(content="Test 2", embedding=[0.2, 0.2, 0.2], metadata={}),
|
||||
Chunk(content="Test 3", embedding=None, metadata={}),
|
||||
Chunk(content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}),
|
||||
Chunk(
|
||||
content="Test 2",
|
||||
chunk_id=generate_chunk_id("test-doc", "Test 2"),
|
||||
embedding=[0.2, 0.2, 0.2],
|
||||
metadata={},
|
||||
),
|
||||
Chunk(content="Test 3", chunk_id=generate_chunk_id("test-doc", "Test 3"), embedding=None, metadata={}),
|
||||
]
|
||||
|
||||
mock_inference_api.openai_embeddings.return_value.data = [
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue