Merge remote-tracking branch 'origin/main' into dependabot/uv/openai-2.5.0

2025-12-15 08:02:39 +00:00 · 2025-10-22 12:17:03 -07:00 · 2025-10-22 12:17:03 -07:00 · 13450c1a68
commit 13450c1a68
parent 090fa7007e bb1ebb3c6b
317 changed files with 86802 additions and 18957 deletions
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -5547,7 +5547,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -5798,7 +5798,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -9024,6 +9024,10 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -9901,6 +9905,10 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
@ -13459,7 +13467,7 @@
        },
        {
            "name": "Inference",
-            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
            "x-displayName": "Inference"
        },
        {
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -4114,7 +4114,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -4303,7 +4303,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -6734,6 +6734,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -7403,6 +7407,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
@ -10210,13 +10218,16 @@ tags:
      embeddings.


-      This API provides the raw interface to the underlying models. Two kinds of models
-      are supported:
+      This API provides the raw interface to the underlying models. Three kinds of
+      models are supported:

      - LLM models: these models generate "raw" and "chat" (conversational) completions.

      - Embedding models: these models generate embeddings to be used for semantic
      search.
+
+      - Rerank models: these models reorder the documents based on their relevance
+      to a query.
    x-displayName: Inference
  - name: Models
    description: ''
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@ -1850,7 +1850,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -3983,7 +3983,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -1320,7 +1320,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -2927,7 +2927,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -6767,7 +6767,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -6826,7 +6826,8 @@
                "type": "string",
                "enum": [
                    "llm",
-                    "embedding"
+                    "embedding",
+                    "rerank"
                ],
                "title": "ModelType",
                "description": "Enumeration of supported model types in Llama Stack."
@ -7567,6 +7568,10 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -8115,6 +8120,10 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
@ -10164,7 +10173,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -10646,7 +10655,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -11699,7 +11708,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -13228,7 +13237,7 @@
        },
        {
            "name": "Inference",
-            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
            "x-displayName": "Inference"
        },
        {
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -5127,7 +5127,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -5169,6 +5169,7 @@ components:
      enum:
        - llm
        - embedding
+        - rerank
      title: ModelType
      description: >-
        Enumeration of supported model types in Llama Stack.
@ -5715,6 +5716,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -6118,6 +6123,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
@ -7811,7 +7820,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -8119,7 +8128,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -8882,7 +8891,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -10082,13 +10091,16 @@ tags:
      embeddings.


-      This API provides the raw interface to the underlying models. Two kinds of models
-      are supported:
+      This API provides the raw interface to the underlying models. Three kinds of
+      models are supported:

      - LLM models: these models generate "raw" and "chat" (conversational) completions.

      - Embedding models: these models generate embeddings to be used for semantic
      search.
+
+      - Rerank models: these models reorder the documents based on their relevance
+      to a query.
    x-displayName: Inference
  - name: Inspect
    description: >-
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -8439,7 +8439,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -8498,7 +8498,8 @@
                "type": "string",
                "enum": [
                    "llm",
-                    "embedding"
+                    "embedding",
+                    "rerank"
                ],
                "title": "ModelType",
                "description": "Enumeration of supported model types in Llama Stack."
@ -9239,6 +9240,10 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -9787,6 +9792,10 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
@ -11836,7 +11845,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -12318,7 +12327,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -13371,7 +13380,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -14918,7 +14927,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -16663,7 +16672,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -17918,7 +17927,7 @@
        },
        {
            "name": "Inference",
-            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
            "x-displayName": "Inference"
        },
        {
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -6340,7 +6340,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -6382,6 +6382,7 @@ components:
      enum:
        - llm
        - embedding
+        - rerank
      title: ModelType
      description: >-
        Enumeration of supported model types in Llama Stack.
@ -6928,6 +6929,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -7331,6 +7336,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
@ -9024,7 +9033,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -9332,7 +9341,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -10095,7 +10104,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -11217,7 +11226,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -12544,7 +12553,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -13477,13 +13486,16 @@ tags:
      embeddings.


-      This API provides the raw interface to the underlying models. Two kinds of models
-      are supported:
+      This API provides the raw interface to the underlying models. Three kinds of
+      models are supported:

      - LLM models: these models generate "raw" and "chat" (conversational) completions.

      - Embedding models: these models generate embeddings to be used for semantic
      search.
+
+      - Rerank models: these models reorder the documents based on their relevance
+      to a query.
    x-displayName: Inference
  - name: Inspect
    description: >-