Merge remote-tracking branch 'origin/main' into openai_v1

2025-10-04 04:04:14 +00:00 · 2025-09-29 13:41:11 -07:00 · 2025-09-29 13:41:11 -07:00 · 35546386a2
commit 35546386a2
parent 90bccbc38e e9eb004bf8
52 changed files with 580 additions and 802 deletions
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -1239,50 +1239,6 @@
                ]
            }
        },
-        "/v1/inference/embeddings": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/EmbeddingsResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Inference"
-                ],
-                "summary": "Generate embeddings for content pieces using the specified model.",
-                "description": "Generate embeddings for content pieces using the specified model.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/EmbeddingsRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
        "/v1alpha/eval/benchmarks/{benchmark_id}/evaluations": {
            "post": {
                "responses": {
@ -6965,7 +6921,7 @@
                }
            }
        },
-        "/v1/inference/rerank": {
+        "/v1alpha/inference/rerank": {
            "post": {
                "responses": {
                    "200": {
@ -12081,80 +12037,6 @@
                "title": "OpenAIDeleteResponseObject",
                "description": "Response object confirming deletion of an OpenAI response."
            },
-            "EmbeddingsRequest": {
-                "type": "object",
-                "properties": {
-                    "model_id": {
-                        "type": "string",
-                        "description": "The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint."
-                    },
-                    "contents": {
-                        "oneOf": [
-                            {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "$ref": "#/components/schemas/InterleavedContentItem"
-                                }
-                            }
-                        ],
-                        "description": "List of contents to generate embeddings for. Each content can be a string or an InterleavedContentItem (and hence can be multimodal). The behavior depends on the model and provider. Some models may only support text."
-                    },
-                    "text_truncation": {
-                        "type": "string",
-                        "enum": [
-                            "none",
-                            "start",
-                            "end"
-                        ],
-                        "description": "(Optional) Config for how to truncate text for embedding when text is longer than the model's max sequence length."
-                    },
-                    "output_dimension": {
-                        "type": "integer",
-                        "description": "(Optional) Output dimensionality for the embeddings. Only supported by Matryoshka models."
-                    },
-                    "task_type": {
-                        "type": "string",
-                        "enum": [
-                            "query",
-                            "document"
-                        ],
-                        "description": "(Optional) How is the embedding being used? This is only supported by asymmetric embedding models."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "model_id",
-                    "contents"
-                ],
-                "title": "EmbeddingsRequest"
-            },
-            "EmbeddingsResponse": {
-                "type": "object",
-                "properties": {
-                    "embeddings": {
-                        "type": "array",
-                        "items": {
-                            "type": "array",
-                            "items": {
-                                "type": "number"
-                            }
-                        },
-                        "description": "List of embedding vectors, one per input content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "embeddings"
-                ],
-                "title": "EmbeddingsResponse",
-                "description": "Response containing generated embeddings."
-            },
            "AgentCandidate": {
                "type": "object",
                "properties": {