# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json organization: # Name of your organization or company, used to determine the name of the client # and headings. name: llama-stack-client docs: https://llama-stack.readthedocs.io/en/latest/ contact: llamastack@meta.com security: - {} - BearerAuth: [] security_schemes: BearerAuth: type: http scheme: bearer # `targets` define the output targets and their customization options, such as # whether to emit the Node SDK and what it's package name should be. targets: node: package_name: llama-stack-client production_repo: llamastack/llama-stack-client-typescript publish: npm: false python: package_name: llama_stack_client production_repo: llamastack/llama-stack-client-python options: use_uv: true publish: pypi: true project_name: llama_stack_client kotlin: reverse_domain: com.llama_stack_client.api production_repo: null publish: maven: false go: package_name: llama-stack-client production_repo: llamastack/llama-stack-client-go options: enable_v2: true back_compat_use_shared_package: false # `client_settings` define settings for the API client, such as extra constructor # arguments (used for authentication), retry behavior, idempotency, etc. client_settings: default_env_prefix: LLAMA_STACK_CLIENT opts: api_key: type: string read_env: LLAMA_STACK_CLIENT_API_KEY auth: { security_scheme: BearerAuth } nullable: true # `environments` are a map of the name of the environment (e.g. "sandbox", # "production") to the corresponding url to use. environments: production: http://any-hosted-llama-stack.com # `pagination` defines [pagination schemes] which provides a template to match # endpoints and generate next-page and auto-pagination helpers in the SDKs. pagination: - name: datasets_iterrows type: offset request: dataset_id: type: string start_index: type: integer x-stainless-pagination-property: purpose: offset_count_param limit: type: integer response: data: type: array items: type: object next_index: type: integer x-stainless-pagination-property: purpose: offset_count_start_field - name: openai_cursor_page type: cursor request: limit: type: integer after: type: string x-stainless-pagination-property: purpose: next_cursor_param response: data: type: array items: {} has_more: type: boolean last_id: type: string x-stainless-pagination-property: purpose: next_cursor_field # `resources` define the structure and organziation for your API, such as how # methods and models are grouped together and accessed. See the [configuration # guide] for more information. # # [configuration guide]: # https://app.stainlessapi.com/docs/guides/configure#resources resources: $shared: models: agent_config: AgentConfig interleaved_content_item: InterleavedContentItem interleaved_content: InterleavedContent batch_completion: BatchCompletionResponse param_type: ParamType safety_violation: SafetyViolation sampling_params: SamplingParams scoring_result: ScoringResult message: Message user_message: UserMessage completion_message: CompletionMessage tool_response_message: ToolResponseMessage system_message: SystemMessage tool_call: ToolCall metric: MetricInResponse tool_param_definition: ToolParamDefinition content_delta: ContentDelta query_result: RAGQueryResult document: RAGDocument query_config: RAGQueryConfig query_generator_config: RAGQueryGeneratorConfig response_format: ResponseFormat chat_completion_response: ChatCompletionResponse toolgroups: models: tool_group: ToolGroup list_tool_groups_response: ListToolGroupsResponse methods: register: post /v1/toolgroups get: get /v1/toolgroups/{toolgroup_id} list: get /v1/toolgroups unregister: delete /v1/toolgroups/{toolgroup_id} tools: models: tool: Tool list_tools_response: ListToolsResponse methods: get: get /v1/tools/{tool_name} list: endpoint: get /v1/tools paginated: false tool_runtime: models: tool_def: ToolDef tool_invocation_result: ToolInvocationResult methods: list_tools: endpoint: get /v1/tool-runtime/list-tools paginated: false invoke_tool: post /v1/tool-runtime/invoke subresources: rag_tool: methods: insert: post /v1/tool-runtime/rag-tool/insert query: post /v1/tool-runtime/rag-tool/query responses: models: response_object_stream: OpenAIResponseObjectStream response_object: OpenAIResponseObject methods: create: type: http endpoint: post /v1/openai/v1/responses streaming: stream_event_model: responses.response_object_stream param_discriminator: stream retrieve: get /v1/openai/v1/responses/{response_id} list: type: http endpoint: get /v1/openai/v1/responses delete: type: http endpoint: delete /v1/openai/v1/responses/{response_id} subresources: input_items: methods: list: type: http endpoint: get /v1/openai/v1/responses/{response_id}/input_items agents: # Configure the methods defined in this resource. Each key in the object is the # name of the method and the value is either an endpoint (for example, `get /v1/foo`) # or an object with more detail. # # [reference]: https://app.stainlessapi.com/docs/reference/config#method methods: create: post /v1/agents list: get /v1/agents retrieve: get /v1/agents/{agent_id} delete: delete /v1/agents/{agent_id} models: inference_step: InferenceStep tool_execution_step: ToolExecutionStep tool_response: ToolResponse shield_call_step: ShieldCallStep memory_retrieval_step: MemoryRetrievalStep # Subresources define resources that are nested within another for more powerful # logical groupings, e.g. `cards.payments`. subresources: session: # Configure the models--named types--defined in the resource. Each key in the # object is the name of the model and the value is either the name of a schema in # `#/components/schemas` or an object with more detail. # # [reference]: https://app.stainlessapi.com/docs/reference/config#model models: session: Session methods: list: get /v1/agents/{agent_id}/sessions create: post /v1/agents/{agent_id}/session delete: delete /v1/agents/{agent_id}/session/{session_id} retrieve: get /v1/agents/{agent_id}/session/{session_id} steps: methods: retrieve: get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id} turn: models: turn: Turn turn_response_event: AgentTurnResponseEvent turn_response_event_payload: AgentTurnResponseEventPayload agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk methods: create: type: http endpoint: post /v1/agents/{agent_id}/session/{session_id}/turn streaming: stream_event_model: agents.turn.agent_turn_response_stream_chunk param_discriminator: stream retrieve: get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id} resume: type: http endpoint: post /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume streaming: stream_event_model: agents.turn.agent_turn_response_stream_chunk param_discriminator: stream datasets: models: list_datasets_response: ListDatasetsResponse methods: register: post /v1/datasets retrieve: get /v1/datasets/{dataset_id} list: endpoint: get /v1/datasets paginated: false unregister: delete /v1/datasets/{dataset_id} iterrows: get /v1/datasetio/iterrows/{dataset_id} appendrows: post /v1/datasetio/append-rows/{dataset_id} eval: methods: evaluate_rows: post /v1/eval/benchmarks/{benchmark_id}/evaluations run_eval: post /v1/eval/benchmarks/{benchmark_id}/jobs evaluate_rows_alpha: post /v1/eval/benchmarks/{benchmark_id}/evaluations run_eval_alpha: post /v1/eval/benchmarks/{benchmark_id}/jobs subresources: jobs: methods: cancel: delete /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id} status: get /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id} retrieve: get /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result models: evaluate_response: EvaluateResponse benchmark_config: BenchmarkConfig eval_candidate: EvalCandidate job: Job inspect: models: healthInfo: HealthInfo providerInfo: ProviderInfo routeInfo: RouteInfo versionInfo: VersionInfo methods: health: get /v1/health version: get /v1/version inference: models: completionResponse: CompletionResponse embeddingsResponse: EmbeddingsResponse token_log_probs: TokenLogProbs chat_completion_response_stream_chunk: ChatCompletionResponseStreamChunk methods: chat_completion: deprecated: "/v1/inference/chat-completion is deprecated. Please use /v1/openai/v1/chat/completions." type: http endpoint: post /v1/inference/chat-completion streaming: stream_event_model: inference.chat_completion_response_stream_chunk param_discriminator: stream completion: deprecated: "/v1/inference/completion is deprecated. Please use /v1/openai/v1/completions." type: http endpoint: post /v1/inference/completion streaming: param_discriminator: stream batch_completion: post /v1/inference/batch-completion batch_chat_completion: post /v1/inference/batch-chat-completion embeddings: deprecated: "/v1/inference/embeddings is deprecated. Please use /v1/openai/v1/embeddings." type: http endpoint: post /v1/inference/embeddings rerank: post /v1/inference/rerank embeddings: models: create_embeddings_response: OpenAIEmbeddingsResponse methods: create: post /v1/openai/v1/embeddings chat: models: chat_completion_chunk: OpenAIChatCompletionChunk subresources: completions: methods: create: type: http endpoint: post /v1/openai/v1/chat/completions streaming: stream_event_model: chat.chat_completion_chunk param_discriminator: stream list: type: http endpoint: get /v1/openai/v1/chat/completions retrieve: type: http endpoint: get /v1/openai/v1/chat/completions/{completion_id} completions: methods: create: type: http endpoint: post /v1/openai/v1/completions streaming: param_discriminator: stream vector_io: models: queryChunksResponse: QueryChunksResponse methods: insert: post /v1/vector-io/insert query: post /v1/vector-io/query vector_dbs: models: list_vector_dbs_response: ListVectorDBsResponse methods: retrieve: get /v1/vector-dbs/{vector_db_id} list: endpoint: get /v1/vector-dbs paginated: false register: post /v1/vector-dbs unregister: delete /v1/vector-dbs/{vector_db_id} vector_stores: models: vector_store: VectorStoreObject list_vector_stores_response: VectorStoreListResponse vector_store_delete_response: VectorStoreDeleteResponse vector_store_search_response: VectorStoreSearchResponsePage methods: create: post /v1/openai/v1/vector_stores list: endpoint: get /v1/openai/v1/vector_stores retrieve: get /v1/openai/v1/vector_stores/{vector_store_id} update: post /v1/openai/v1/vector_stores/{vector_store_id} delete: delete /v1/openai/v1/vector_stores/{vector_store_id} search: post /v1/openai/v1/vector_stores/{vector_store_id}/search subresources: files: models: vector_store_file: VectorStoreFileObject methods: list: get /v1/openai/v1/vector_stores/{vector_store_id}/files retrieve: get /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id} update: post /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id} delete: delete /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id} create: post /v1/openai/v1/vector_stores/{vector_store_id}/files content: get /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content models: models: model: Model list_models_response: ListModelsResponse methods: retrieve: get /v1/models/{model_id} list: endpoint: get /v1/models paginated: false register: post /v1/models unregister: delete /v1/models/{model_id} subresources: openai: methods: list: endpoint: get /v1/openai/v1/models paginated: false post_training: models: algorithm_config: AlgorithmConfig post_training_job: PostTrainingJob list_post_training_jobs_response: ListPostTrainingJobsResponse methods: preference_optimize: post /v1/post-training/preference-optimize supervised_fine_tune: post /v1/post-training/supervised-fine-tune subresources: job: methods: artifacts: get /v1/post-training/job/artifacts cancel: post /v1/post-training/job/cancel status: get /v1/post-training/job/status list: endpoint: get /v1/post-training/jobs paginated: false providers: models: list_providers_response: ListProvidersResponse methods: list: endpoint: get /v1/providers paginated: false retrieve: get /v1/providers/{provider_id} routes: models: list_routes_response: ListRoutesResponse methods: list: endpoint: get /v1/inspect/routes paginated: false moderations: models: create_response: ModerationObject methods: create: post /v1/openai/v1/moderations safety: models: run_shield_response: RunShieldResponse methods: run_shield: post /v1/safety/run-shield shields: models: shield: Shield list_shields_response: ListShieldsResponse methods: retrieve: get /v1/shields/{identifier} list: endpoint: get /v1/shields paginated: false register: post /v1/shields delete: delete /v1/shields/{identifier} synthetic_data_generation: models: syntheticDataGenerationResponse: SyntheticDataGenerationResponse methods: generate: post /v1/synthetic-data-generation/generate telemetry: models: span_with_status: SpanWithStatus trace: Trace query_spans_response: QuerySpansResponse event: Event query_condition: QueryCondition methods: query_traces: endpoint: post /v1/telemetry/traces skip_test_reason: "unsupported query params in java / kotlin" get_span_tree: post /v1/telemetry/spans/{span_id}/tree query_spans: endpoint: post /v1/telemetry/spans skip_test_reason: "unsupported query params in java / kotlin" query_metrics: endpoint: post /v1/telemetry/metrics/{metric_name} skip_test_reason: "unsupported query params in java / kotlin" log_event: post /v1/telemetry/events save_spans_to_dataset: post /v1/telemetry/spans/export get_span: get /v1/telemetry/traces/{trace_id}/spans/{span_id} get_trace: get /v1/telemetry/traces/{trace_id} scoring: methods: score: post /v1/scoring/score score_batch: post /v1/scoring/score-batch scoring_functions: methods: retrieve: get /v1/scoring-functions/{scoring_fn_id} list: endpoint: get /v1/scoring-functions paginated: false register: post /v1/scoring-functions models: scoring_fn: ScoringFn scoring_fn_params: ScoringFnParams list_scoring_functions_response: ListScoringFunctionsResponse benchmarks: methods: retrieve: get /v1/eval/benchmarks/{benchmark_id} list: endpoint: get /v1/eval/benchmarks paginated: false register: post /v1/eval/benchmarks models: benchmark: Benchmark list_benchmarks_response: ListBenchmarksResponse files: methods: create: post /v1/openai/v1/files list: get /v1/openai/v1/files retrieve: get /v1/openai/v1/files/{file_id} delete: delete /v1/openai/v1/files/{file_id} content: get /v1/openai/v1/files/{file_id}/content models: file: OpenAIFileObject list_files_response: ListOpenAIFileResponse delete_file_response: OpenAIFileDeleteResponse settings: license: MIT unwrap_response_fields: [data] openapi: transformations: - command: renameValue reason: pydantic reserved name args: filter: only: - "$.components.schemas.InferenceStep.properties.model_response" rename: python: property_name: "inference_model_response" # - command: renameValue # reason: pydantic reserved name # args: # filter: # only: # - '$.components.schemas.Model.properties.model_type' # rename: # python: # property_name: 'type' - command: mergeObject reason: Better return_type using enum args: target: - "$.components.schemas" object: ReturnType: additionalProperties: false properties: type: enum: - string - number - boolean - array - object - json - union - chat_completion_input - completion_input - agent_turn_input required: - type type: object - command: replaceProperties reason: Replace return type properties with better model (see above) args: filter: only: - "$.components.schemas.ScoringFn.properties.return_type" - "$.components.schemas.RegisterScoringFunctionRequest.properties.return_type" value: $ref: "#/components/schemas/ReturnType" - command: oneOfToAnyOf reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants - reason: For better names command: extractToRefs args: ref: target: "$.components.schemas.ToolCallDelta.properties.tool_call" name: "#/components/schemas/ToolCallOrString" # `readme` is used to configure the code snippets that will be rendered in the # README.md of various SDKs. In particular, you can change the `headline` # snippet's endpoint and the arguments to call it with. readme: example_requests: default: type: request endpoint: post /v1/inference/chat-completion params: &ref_0 {} headline: type: request endpoint: post /v1/models params: *ref_0 pagination: type: request endpoint: post /v1/inference/chat-completion params: {}