mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
624 lines
20 KiB
YAML
624 lines
20 KiB
YAML
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
|
|
|
|
organization:
|
|
# Name of your organization or company, used to determine the name of the client
|
|
# and headings.
|
|
name: llama-stack-client
|
|
docs: https://llama-stack.readthedocs.io/en/latest/
|
|
contact: llamastack@meta.com
|
|
security:
|
|
- {}
|
|
- BearerAuth: []
|
|
security_schemes:
|
|
BearerAuth:
|
|
type: http
|
|
scheme: bearer
|
|
# `targets` define the output targets and their customization options, such as
|
|
# whether to emit the Node SDK and what it's package name should be.
|
|
targets:
|
|
node:
|
|
package_name: llama-stack-client
|
|
production_repo: llamastack/llama-stack-client-typescript
|
|
publish:
|
|
npm: false
|
|
python:
|
|
package_name: llama_stack_client
|
|
production_repo: llamastack/llama-stack-client-python
|
|
options:
|
|
use_uv: true
|
|
publish:
|
|
pypi: true
|
|
project_name: llama_stack_client
|
|
kotlin:
|
|
reverse_domain: com.llama_stack_client.api
|
|
production_repo: null
|
|
publish:
|
|
maven: false
|
|
go:
|
|
package_name: llama-stack-client
|
|
production_repo: llamastack/llama-stack-client-go
|
|
options:
|
|
enable_v2: true
|
|
back_compat_use_shared_package: false
|
|
|
|
# `client_settings` define settings for the API client, such as extra constructor
|
|
# arguments (used for authentication), retry behavior, idempotency, etc.
|
|
client_settings:
|
|
default_env_prefix: LLAMA_STACK_CLIENT
|
|
opts:
|
|
api_key:
|
|
type: string
|
|
read_env: LLAMA_STACK_CLIENT_API_KEY
|
|
auth: { security_scheme: BearerAuth }
|
|
nullable: true
|
|
|
|
# `environments` are a map of the name of the environment (e.g. "sandbox",
|
|
# "production") to the corresponding url to use.
|
|
environments:
|
|
production: http://any-hosted-llama-stack.com
|
|
|
|
# `pagination` defines [pagination schemes] which provides a template to match
|
|
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
|
|
pagination:
|
|
- name: datasets_iterrows
|
|
type: offset
|
|
request:
|
|
dataset_id:
|
|
type: string
|
|
start_index:
|
|
type: integer
|
|
x-stainless-pagination-property:
|
|
purpose: offset_count_param
|
|
limit:
|
|
type: integer
|
|
response:
|
|
data:
|
|
type: array
|
|
items:
|
|
type: object
|
|
next_index:
|
|
type: integer
|
|
x-stainless-pagination-property:
|
|
purpose: offset_count_start_field
|
|
- name: openai_cursor_page
|
|
type: cursor
|
|
request:
|
|
limit:
|
|
type: integer
|
|
after:
|
|
type: string
|
|
x-stainless-pagination-property:
|
|
purpose: next_cursor_param
|
|
response:
|
|
data:
|
|
type: array
|
|
items: {}
|
|
has_more:
|
|
type: boolean
|
|
last_id:
|
|
type: string
|
|
x-stainless-pagination-property:
|
|
purpose: next_cursor_field
|
|
# `resources` define the structure and organziation for your API, such as how
|
|
# methods and models are grouped together and accessed. See the [configuration
|
|
# guide] for more information.
|
|
#
|
|
# [configuration guide]:
|
|
# https://app.stainlessapi.com/docs/guides/configure#resources
|
|
resources:
|
|
$shared:
|
|
models:
|
|
agent_config: AgentConfig
|
|
interleaved_content_item: InterleavedContentItem
|
|
interleaved_content: InterleavedContent
|
|
batch_completion: BatchCompletionResponse
|
|
param_type: ParamType
|
|
safety_violation: SafetyViolation
|
|
sampling_params: SamplingParams
|
|
scoring_result: ScoringResult
|
|
message: Message
|
|
user_message: UserMessage
|
|
completion_message: CompletionMessage
|
|
tool_response_message: ToolResponseMessage
|
|
system_message: SystemMessage
|
|
tool_call: ToolCall
|
|
metric: MetricInResponse
|
|
tool_param_definition: ToolParamDefinition
|
|
content_delta: ContentDelta
|
|
query_result: RAGQueryResult
|
|
document: RAGDocument
|
|
query_config: RAGQueryConfig
|
|
query_generator_config: RAGQueryGeneratorConfig
|
|
response_format: ResponseFormat
|
|
chat_completion_response: ChatCompletionResponse
|
|
toolgroups:
|
|
models:
|
|
tool_group: ToolGroup
|
|
list_tool_groups_response: ListToolGroupsResponse
|
|
methods:
|
|
register: post /v1/toolgroups
|
|
get: get /v1/toolgroups/{toolgroup_id}
|
|
list: get /v1/toolgroups
|
|
unregister: delete /v1/toolgroups/{toolgroup_id}
|
|
tools:
|
|
models:
|
|
tool: Tool
|
|
list_tools_response: ListToolsResponse
|
|
methods:
|
|
get: get /v1/tools/{tool_name}
|
|
list:
|
|
endpoint: get /v1/tools
|
|
paginated: false
|
|
|
|
tool_runtime:
|
|
models:
|
|
tool_def: ToolDef
|
|
tool_invocation_result: ToolInvocationResult
|
|
methods:
|
|
list_tools:
|
|
endpoint: get /v1/tool-runtime/list-tools
|
|
paginated: false
|
|
invoke_tool: post /v1/tool-runtime/invoke
|
|
subresources:
|
|
rag_tool:
|
|
methods:
|
|
insert: post /v1/tool-runtime/rag-tool/insert
|
|
query: post /v1/tool-runtime/rag-tool/query
|
|
|
|
responses:
|
|
models:
|
|
response_object_stream: OpenAIResponseObjectStream
|
|
response_object: OpenAIResponseObject
|
|
methods:
|
|
create:
|
|
type: http
|
|
endpoint: post /v1/openai/v1/responses
|
|
streaming:
|
|
stream_event_model: responses.response_object_stream
|
|
param_discriminator: stream
|
|
retrieve: get /v1/openai/v1/responses/{response_id}
|
|
list:
|
|
type: http
|
|
endpoint: get /v1/openai/v1/responses
|
|
delete:
|
|
type: http
|
|
endpoint: delete /v1/openai/v1/responses/{response_id}
|
|
subresources:
|
|
input_items:
|
|
methods:
|
|
list:
|
|
type: http
|
|
endpoint: get /v1/openai/v1/responses/{response_id}/input_items
|
|
|
|
agents:
|
|
# Configure the methods defined in this resource. Each key in the object is the
|
|
# name of the method and the value is either an endpoint (for example, `get /v1/foo`)
|
|
# or an object with more detail.
|
|
#
|
|
# [reference]: https://app.stainlessapi.com/docs/reference/config#method
|
|
methods:
|
|
create: post /v1/agents
|
|
list: get /v1/agents
|
|
retrieve: get /v1/agents/{agent_id}
|
|
delete: delete /v1/agents/{agent_id}
|
|
models:
|
|
inference_step: InferenceStep
|
|
tool_execution_step: ToolExecutionStep
|
|
tool_response: ToolResponse
|
|
shield_call_step: ShieldCallStep
|
|
memory_retrieval_step: MemoryRetrievalStep
|
|
# Subresources define resources that are nested within another for more powerful
|
|
# logical groupings, e.g. `cards.payments`.
|
|
subresources:
|
|
session:
|
|
# Configure the models--named types--defined in the resource. Each key in the
|
|
# object is the name of the model and the value is either the name of a schema in
|
|
# `#/components/schemas` or an object with more detail.
|
|
#
|
|
# [reference]: https://app.stainlessapi.com/docs/reference/config#model
|
|
models:
|
|
session: Session
|
|
methods:
|
|
list: get /v1/agents/{agent_id}/sessions
|
|
create: post /v1/agents/{agent_id}/session
|
|
delete: delete /v1/agents/{agent_id}/session/{session_id}
|
|
retrieve: get /v1/agents/{agent_id}/session/{session_id}
|
|
steps:
|
|
methods:
|
|
retrieve: get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
|
|
turn:
|
|
models:
|
|
turn: Turn
|
|
turn_response_event: AgentTurnResponseEvent
|
|
turn_response_event_payload: AgentTurnResponseEventPayload
|
|
agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
|
|
methods:
|
|
create:
|
|
type: http
|
|
endpoint: post /v1/agents/{agent_id}/session/{session_id}/turn
|
|
streaming:
|
|
stream_event_model: agents.turn.agent_turn_response_stream_chunk
|
|
param_discriminator: stream
|
|
retrieve: get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}
|
|
resume:
|
|
type: http
|
|
endpoint: post /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
|
|
streaming:
|
|
stream_event_model: agents.turn.agent_turn_response_stream_chunk
|
|
param_discriminator: stream
|
|
|
|
datasets:
|
|
models:
|
|
list_datasets_response: ListDatasetsResponse
|
|
methods:
|
|
register: post /v1/datasets
|
|
retrieve: get /v1/datasets/{dataset_id}
|
|
list:
|
|
endpoint: get /v1/datasets
|
|
paginated: false
|
|
unregister: delete /v1/datasets/{dataset_id}
|
|
iterrows: get /v1/datasetio/iterrows/{dataset_id}
|
|
appendrows: post /v1/datasetio/append-rows/{dataset_id}
|
|
|
|
eval:
|
|
methods:
|
|
evaluate_rows: post /v1/eval/benchmarks/{benchmark_id}/evaluations
|
|
run_eval: post /v1/eval/benchmarks/{benchmark_id}/jobs
|
|
evaluate_rows_alpha: post /v1/eval/benchmarks/{benchmark_id}/evaluations
|
|
run_eval_alpha: post /v1/eval/benchmarks/{benchmark_id}/jobs
|
|
|
|
subresources:
|
|
jobs:
|
|
methods:
|
|
cancel: delete /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
|
status: get /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
|
retrieve: get /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
|
|
models:
|
|
evaluate_response: EvaluateResponse
|
|
benchmark_config: BenchmarkConfig
|
|
eval_candidate: EvalCandidate
|
|
job: Job
|
|
|
|
inspect:
|
|
models:
|
|
healthInfo: HealthInfo
|
|
providerInfo: ProviderInfo
|
|
routeInfo: RouteInfo
|
|
versionInfo: VersionInfo
|
|
methods:
|
|
health: get /v1/health
|
|
version: get /v1/version
|
|
|
|
inference:
|
|
models:
|
|
completionResponse: CompletionResponse
|
|
embeddingsResponse: EmbeddingsResponse
|
|
token_log_probs: TokenLogProbs
|
|
chat_completion_response_stream_chunk: ChatCompletionResponseStreamChunk
|
|
methods:
|
|
chat_completion:
|
|
deprecated: "/v1/inference/chat-completion is deprecated. Please use /v1/openai/v1/chat/completions."
|
|
type: http
|
|
endpoint: post /v1/inference/chat-completion
|
|
streaming:
|
|
stream_event_model: inference.chat_completion_response_stream_chunk
|
|
param_discriminator: stream
|
|
completion:
|
|
deprecated: "/v1/inference/completion is deprecated. Please use /v1/openai/v1/completions."
|
|
type: http
|
|
endpoint: post /v1/inference/completion
|
|
streaming:
|
|
param_discriminator: stream
|
|
batch_completion: post /v1/inference/batch-completion
|
|
batch_chat_completion: post /v1/inference/batch-chat-completion
|
|
embeddings:
|
|
deprecated: "/v1/inference/embeddings is deprecated. Please use /v1/openai/v1/embeddings."
|
|
type: http
|
|
endpoint: post /v1/inference/embeddings
|
|
rerank: post /v1/inference/rerank
|
|
|
|
embeddings:
|
|
models:
|
|
create_embeddings_response: OpenAIEmbeddingsResponse
|
|
methods:
|
|
create: post /v1/openai/v1/embeddings
|
|
|
|
chat:
|
|
models:
|
|
chat_completion_chunk: OpenAIChatCompletionChunk
|
|
subresources:
|
|
completions:
|
|
methods:
|
|
create:
|
|
type: http
|
|
endpoint: post /v1/openai/v1/chat/completions
|
|
streaming:
|
|
stream_event_model: chat.chat_completion_chunk
|
|
param_discriminator: stream
|
|
list:
|
|
type: http
|
|
endpoint: get /v1/openai/v1/chat/completions
|
|
retrieve:
|
|
type: http
|
|
endpoint: get /v1/openai/v1/chat/completions/{completion_id}
|
|
completions:
|
|
methods:
|
|
create:
|
|
type: http
|
|
endpoint: post /v1/openai/v1/completions
|
|
streaming:
|
|
param_discriminator: stream
|
|
|
|
vector_io:
|
|
models:
|
|
queryChunksResponse: QueryChunksResponse
|
|
methods:
|
|
insert: post /v1/vector-io/insert
|
|
query: post /v1/vector-io/query
|
|
|
|
vector_dbs:
|
|
models:
|
|
list_vector_dbs_response: ListVectorDBsResponse
|
|
methods:
|
|
retrieve: get /v1/vector-dbs/{vector_db_id}
|
|
list:
|
|
endpoint: get /v1/vector-dbs
|
|
paginated: false
|
|
register: post /v1/vector-dbs
|
|
unregister: delete /v1/vector-dbs/{vector_db_id}
|
|
|
|
vector_stores:
|
|
models:
|
|
vector_store: VectorStoreObject
|
|
list_vector_stores_response: VectorStoreListResponse
|
|
vector_store_delete_response: VectorStoreDeleteResponse
|
|
vector_store_search_response: VectorStoreSearchResponsePage
|
|
methods:
|
|
create: post /v1/openai/v1/vector_stores
|
|
list:
|
|
endpoint: get /v1/openai/v1/vector_stores
|
|
retrieve: get /v1/openai/v1/vector_stores/{vector_store_id}
|
|
update: post /v1/openai/v1/vector_stores/{vector_store_id}
|
|
delete: delete /v1/openai/v1/vector_stores/{vector_store_id}
|
|
search: post /v1/openai/v1/vector_stores/{vector_store_id}/search
|
|
subresources:
|
|
files:
|
|
models:
|
|
vector_store_file: VectorStoreFileObject
|
|
methods:
|
|
list: get /v1/openai/v1/vector_stores/{vector_store_id}/files
|
|
retrieve: get /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}
|
|
update: post /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}
|
|
delete: delete /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}
|
|
create: post /v1/openai/v1/vector_stores/{vector_store_id}/files
|
|
content: get /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content
|
|
|
|
models:
|
|
models:
|
|
model: Model
|
|
list_models_response: ListModelsResponse
|
|
methods:
|
|
retrieve: get /v1/models/{model_id}
|
|
list:
|
|
endpoint: get /v1/models
|
|
paginated: false
|
|
register: post /v1/models
|
|
unregister: delete /v1/models/{model_id}
|
|
subresources:
|
|
openai:
|
|
methods:
|
|
list:
|
|
endpoint: get /v1/openai/v1/models
|
|
paginated: false
|
|
|
|
post_training:
|
|
models:
|
|
algorithm_config: AlgorithmConfig
|
|
post_training_job: PostTrainingJob
|
|
list_post_training_jobs_response: ListPostTrainingJobsResponse
|
|
methods:
|
|
preference_optimize: post /v1/post-training/preference-optimize
|
|
supervised_fine_tune: post /v1/post-training/supervised-fine-tune
|
|
subresources:
|
|
job:
|
|
methods:
|
|
artifacts: get /v1/post-training/job/artifacts
|
|
cancel: post /v1/post-training/job/cancel
|
|
status: get /v1/post-training/job/status
|
|
list:
|
|
endpoint: get /v1/post-training/jobs
|
|
paginated: false
|
|
|
|
providers:
|
|
models:
|
|
list_providers_response: ListProvidersResponse
|
|
methods:
|
|
list:
|
|
endpoint: get /v1/providers
|
|
paginated: false
|
|
retrieve: get /v1/providers/{provider_id}
|
|
|
|
routes:
|
|
models:
|
|
list_routes_response: ListRoutesResponse
|
|
methods:
|
|
list:
|
|
endpoint: get /v1/inspect/routes
|
|
paginated: false
|
|
|
|
moderations:
|
|
models:
|
|
create_response: ModerationObject
|
|
methods:
|
|
create: post /v1/openai/v1/moderations
|
|
|
|
safety:
|
|
models:
|
|
run_shield_response: RunShieldResponse
|
|
methods:
|
|
run_shield: post /v1/safety/run-shield
|
|
|
|
shields:
|
|
models:
|
|
shield: Shield
|
|
list_shields_response: ListShieldsResponse
|
|
methods:
|
|
retrieve: get /v1/shields/{identifier}
|
|
list:
|
|
endpoint: get /v1/shields
|
|
paginated: false
|
|
register: post /v1/shields
|
|
delete: delete /v1/shields/{identifier}
|
|
|
|
synthetic_data_generation:
|
|
models:
|
|
syntheticDataGenerationResponse: SyntheticDataGenerationResponse
|
|
methods:
|
|
generate: post /v1/synthetic-data-generation/generate
|
|
|
|
telemetry:
|
|
models:
|
|
span_with_status: SpanWithStatus
|
|
trace: Trace
|
|
query_spans_response: QuerySpansResponse
|
|
event: Event
|
|
query_condition: QueryCondition
|
|
methods:
|
|
query_traces:
|
|
endpoint: post /v1/telemetry/traces
|
|
skip_test_reason: "unsupported query params in java / kotlin"
|
|
get_span_tree: post /v1/telemetry/spans/{span_id}/tree
|
|
query_spans:
|
|
endpoint: post /v1/telemetry/spans
|
|
skip_test_reason: "unsupported query params in java / kotlin"
|
|
query_metrics:
|
|
endpoint: post /v1/telemetry/metrics/{metric_name}
|
|
skip_test_reason: "unsupported query params in java / kotlin"
|
|
log_event: post /v1/telemetry/events
|
|
save_spans_to_dataset: post /v1/telemetry/spans/export
|
|
get_span: get /v1/telemetry/traces/{trace_id}/spans/{span_id}
|
|
get_trace: get /v1/telemetry/traces/{trace_id}
|
|
|
|
scoring:
|
|
methods:
|
|
score: post /v1/scoring/score
|
|
score_batch: post /v1/scoring/score-batch
|
|
scoring_functions:
|
|
methods:
|
|
retrieve: get /v1/scoring-functions/{scoring_fn_id}
|
|
list:
|
|
endpoint: get /v1/scoring-functions
|
|
paginated: false
|
|
register: post /v1/scoring-functions
|
|
models:
|
|
scoring_fn: ScoringFn
|
|
scoring_fn_params: ScoringFnParams
|
|
list_scoring_functions_response: ListScoringFunctionsResponse
|
|
|
|
benchmarks:
|
|
methods:
|
|
retrieve: get /v1/eval/benchmarks/{benchmark_id}
|
|
list:
|
|
endpoint: get /v1/eval/benchmarks
|
|
paginated: false
|
|
register: post /v1/eval/benchmarks
|
|
models:
|
|
benchmark: Benchmark
|
|
list_benchmarks_response: ListBenchmarksResponse
|
|
|
|
files:
|
|
methods:
|
|
create: post /v1/openai/v1/files
|
|
list: get /v1/openai/v1/files
|
|
retrieve: get /v1/openai/v1/files/{file_id}
|
|
delete: delete /v1/openai/v1/files/{file_id}
|
|
content: get /v1/openai/v1/files/{file_id}/content
|
|
models:
|
|
file: OpenAIFileObject
|
|
list_files_response: ListOpenAIFileResponse
|
|
delete_file_response: OpenAIFileDeleteResponse
|
|
|
|
settings:
|
|
license: MIT
|
|
unwrap_response_fields: [data]
|
|
|
|
openapi:
|
|
transformations:
|
|
- command: renameValue
|
|
reason: pydantic reserved name
|
|
args:
|
|
filter:
|
|
only:
|
|
- "$.components.schemas.InferenceStep.properties.model_response"
|
|
rename:
|
|
python:
|
|
property_name: "inference_model_response"
|
|
|
|
# - command: renameValue
|
|
# reason: pydantic reserved name
|
|
# args:
|
|
# filter:
|
|
# only:
|
|
# - '$.components.schemas.Model.properties.model_type'
|
|
# rename:
|
|
# python:
|
|
# property_name: 'type'
|
|
- command: mergeObject
|
|
reason: Better return_type using enum
|
|
args:
|
|
target:
|
|
- "$.components.schemas"
|
|
object:
|
|
ReturnType:
|
|
additionalProperties: false
|
|
properties:
|
|
type:
|
|
enum:
|
|
- string
|
|
- number
|
|
- boolean
|
|
- array
|
|
- object
|
|
- json
|
|
- union
|
|
- chat_completion_input
|
|
- completion_input
|
|
- agent_turn_input
|
|
required:
|
|
- type
|
|
type: object
|
|
- command: replaceProperties
|
|
reason: Replace return type properties with better model (see above)
|
|
args:
|
|
filter:
|
|
only:
|
|
- "$.components.schemas.ScoringFn.properties.return_type"
|
|
- "$.components.schemas.RegisterScoringFunctionRequest.properties.return_type"
|
|
value:
|
|
$ref: "#/components/schemas/ReturnType"
|
|
- command: oneOfToAnyOf
|
|
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
|
|
- reason: For better names
|
|
command: extractToRefs
|
|
args:
|
|
ref:
|
|
target: "$.components.schemas.ToolCallDelta.properties.tool_call"
|
|
name: "#/components/schemas/ToolCallOrString"
|
|
|
|
# `readme` is used to configure the code snippets that will be rendered in the
|
|
# README.md of various SDKs. In particular, you can change the `headline`
|
|
# snippet's endpoint and the arguments to call it with.
|
|
readme:
|
|
example_requests:
|
|
default:
|
|
type: request
|
|
endpoint: post /v1/inference/chat-completion
|
|
params: &ref_0 {}
|
|
headline:
|
|
type: request
|
|
endpoint: post /v1/models
|
|
params: *ref_0
|
|
pagination:
|
|
type: request
|
|
endpoint: post /v1/inference/chat-completion
|
|
params: {}
|