mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-15 08:02:39 +00:00
Merge remote-tracking branch 'origin/main' into dependabot/uv/openai-2.5.0
This commit is contained in:
commit
13450c1a68
317 changed files with 86802 additions and 18957 deletions
14
docs/static/deprecated-llama-stack-spec.html
vendored
14
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -5547,7 +5547,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -5798,7 +5798,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -9024,6 +9024,10 @@
|
|||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
},
|
||||
"input": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
|
@ -9901,6 +9905,10 @@
|
|||
"usage": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
@ -13459,7 +13467,7 @@
|
|||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
|
||||
"x-displayName": "Inference"
|
||||
},
|
||||
{
|
||||
|
|
|
|||
19
docs/static/deprecated-llama-stack-spec.yaml
vendored
19
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -4114,7 +4114,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -4303,7 +4303,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -6734,6 +6734,10 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
input:
|
||||
type: array
|
||||
items:
|
||||
|
|
@ -7403,6 +7407,10 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
additionalProperties: false
|
||||
required:
|
||||
- created_at
|
||||
|
|
@ -10210,13 +10218,16 @@ tags:
|
|||
embeddings.
|
||||
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
are supported:
|
||||
This API provides the raw interface to the underlying models. Three kinds of
|
||||
models are supported:
|
||||
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
|
||||
- Embedding models: these models generate embeddings to be used for semantic
|
||||
search.
|
||||
|
||||
- Rerank models: these models reorder the documents based on their relevance
|
||||
to a query.
|
||||
x-displayName: Inference
|
||||
- name: Models
|
||||
description: ''
|
||||
|
|
|
|||
|
|
@ -1850,7 +1850,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -3983,7 +3983,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
|
|||
|
|
@ -1320,7 +1320,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -2927,7 +2927,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
|
|||
21
docs/static/llama-stack-spec.html
vendored
21
docs/static/llama-stack-spec.html
vendored
|
|
@ -6767,7 +6767,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -6826,7 +6826,8 @@
|
|||
"type": "string",
|
||||
"enum": [
|
||||
"llm",
|
||||
"embedding"
|
||||
"embedding",
|
||||
"rerank"
|
||||
],
|
||||
"title": "ModelType",
|
||||
"description": "Enumeration of supported model types in Llama Stack."
|
||||
|
|
@ -7567,6 +7568,10 @@
|
|||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
},
|
||||
"input": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
|
@ -8115,6 +8120,10 @@
|
|||
"usage": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
@ -10164,7 +10173,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -10646,7 +10655,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -11699,7 +11708,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -13228,7 +13237,7 @@
|
|||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
|
||||
"x-displayName": "Inference"
|
||||
},
|
||||
{
|
||||
|
|
|
|||
24
docs/static/llama-stack-spec.yaml
vendored
24
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -5127,7 +5127,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -5169,6 +5169,7 @@ components:
|
|||
enum:
|
||||
- llm
|
||||
- embedding
|
||||
- rerank
|
||||
title: ModelType
|
||||
description: >-
|
||||
Enumeration of supported model types in Llama Stack.
|
||||
|
|
@ -5715,6 +5716,10 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
input:
|
||||
type: array
|
||||
items:
|
||||
|
|
@ -6118,6 +6123,10 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
additionalProperties: false
|
||||
required:
|
||||
- created_at
|
||||
|
|
@ -7811,7 +7820,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -8119,7 +8128,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -8882,7 +8891,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -10082,13 +10091,16 @@ tags:
|
|||
embeddings.
|
||||
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
are supported:
|
||||
This API provides the raw interface to the underlying models. Three kinds of
|
||||
models are supported:
|
||||
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
|
||||
- Embedding models: these models generate embeddings to be used for semantic
|
||||
search.
|
||||
|
||||
- Rerank models: these models reorder the documents based on their relevance
|
||||
to a query.
|
||||
x-displayName: Inference
|
||||
- name: Inspect
|
||||
description: >-
|
||||
|
|
|
|||
25
docs/static/stainless-llama-stack-spec.html
vendored
25
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -8439,7 +8439,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -8498,7 +8498,8 @@
|
|||
"type": "string",
|
||||
"enum": [
|
||||
"llm",
|
||||
"embedding"
|
||||
"embedding",
|
||||
"rerank"
|
||||
],
|
||||
"title": "ModelType",
|
||||
"description": "Enumeration of supported model types in Llama Stack."
|
||||
|
|
@ -9239,6 +9240,10 @@
|
|||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
},
|
||||
"input": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
|
@ -9787,6 +9792,10 @@
|
|||
"usage": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
@ -11836,7 +11845,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -12318,7 +12327,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -13371,7 +13380,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -14918,7 +14927,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -16663,7 +16672,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
@ -17918,7 +17927,7 @@
|
|||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
|
||||
"x-displayName": "Inference"
|
||||
},
|
||||
{
|
||||
|
|
|
|||
28
docs/static/stainless-llama-stack-spec.yaml
vendored
28
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -6340,7 +6340,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -6382,6 +6382,7 @@ components:
|
|||
enum:
|
||||
- llm
|
||||
- embedding
|
||||
- rerank
|
||||
title: ModelType
|
||||
description: >-
|
||||
Enumeration of supported model types in Llama Stack.
|
||||
|
|
@ -6928,6 +6929,10 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
input:
|
||||
type: array
|
||||
items:
|
||||
|
|
@ -7331,6 +7336,10 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
additionalProperties: false
|
||||
required:
|
||||
- created_at
|
||||
|
|
@ -9024,7 +9033,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -9332,7 +9341,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -10095,7 +10104,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -11217,7 +11226,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -12544,7 +12553,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
@ -13477,13 +13486,16 @@ tags:
|
|||
embeddings.
|
||||
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
are supported:
|
||||
This API provides the raw interface to the underlying models. Three kinds of
|
||||
models are supported:
|
||||
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
|
||||
- Embedding models: these models generate embeddings to be used for semantic
|
||||
search.
|
||||
|
||||
- Rerank models: these models reorder the documents based on their relevance
|
||||
to a query.
|
||||
x-displayName: Inference
|
||||
- name: Inspect
|
||||
description: >-
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue