Merge branch 'main' into add-mcp-authentication-param

2025-12-03 09:53:45 +00:00 · 2025-11-04 16:20:38 -08:00 · 2025-11-04 16:20:38 -08:00 · 8632c705aa
commit 8632c705aa
parent 5c5f6f7e65 392e01dc79
1250 changed files with 2278 additions and 343484 deletions
--- a/docs/docs/concepts/apis/index.mdx
+++ b/docs/docs/concepts/apis/index.mdx
@ -23,5 +23,4 @@ A Llama Stack API is described as a collection of REST endpoints. We currently s
 We are working on adding a few more APIs to complete the application lifecycle. These will include:
 - **Batch Inference**: run inference on a dataset of inputs
 - **Batch Agents**: run agents on a dataset of inputs
- **Synthetic Data Generation**: generate synthetic data for model development
 - **Batches**: OpenAI-compatible batch management for inference
--- a/docs/docs/distributions/k8s/ui-k8s.yaml.template
+++ b/docs/docs/distributions/k8s/ui-k8s.yaml.template
@ -44,7 +44,7 @@ spec:

            # Navigate to the UI directory
            echo "Navigating to UI directory..."
-            cd /app/llama_stack/ui
+            cd /app/llama_stack_ui

            # Check if package.json exists
            if [ ! -f "package.json" ]; then
--- a/docs/docs/getting_started/detailed_tutorial.mdx
+++ b/docs/docs/getting_started/detailed_tutorial.mdx
@ -239,8 +239,13 @@ client = LlamaStackClient(base_url="http://localhost:8321")
 models = client.models.list()

 # Select the first LLM
-llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "ollama")
-model_id = llm.identifier
+llm = next(
+    m for m in models
+    if m.custom_metadata
+    and m.custom_metadata.get("model_type") == "llm"
+    and m.custom_metadata.get("provider_id") == "ollama"
+)
+model_id = llm.id

 print("Model:", model_id)

@ -279,8 +284,13 @@ import uuid
 client = LlamaStackClient(base_url=f"http://localhost:8321")

 models = client.models.list()
-llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "ollama")
-model_id = llm.identifier
+llm = next(
+    m for m in models
+    if m.custom_metadata
+    and m.custom_metadata.get("model_type") == "llm"
+    and m.custom_metadata.get("provider_id") == "ollama"
+)
+model_id = llm.id

 agent = Agent(client, model=model_id, instructions="You are a helpful assistant.")

@ -450,8 +460,11 @@ import uuid
 client = LlamaStackClient(base_url="http://localhost:8321")

 # Create a vector database instance
-embed_lm = next(m for m in client.models.list() if m.model_type == "embedding")
-embedding_model = embed_lm.identifier
+embed_lm = next(
+    m for m in client.models.list()
+    if m.custom_metadata and m.custom_metadata.get("model_type") == "embedding"
+)
+embedding_model = embed_lm.id
 vector_db_id = f"v{uuid.uuid4().hex}"
 # The VectorDB API is deprecated; the server now returns its own authoritative ID.
 # We capture the correct ID from the response's .identifier attribute.
@ -489,9 +502,11 @@ client.tool_runtime.rag_tool.insert(
 llm = next(
    m
    for m in client.models.list()
-    if m.model_type == "llm" and m.provider_id == "ollama"
+    if m.custom_metadata
+    and m.custom_metadata.get("model_type") == "llm"
+    and m.custom_metadata.get("provider_id") == "ollama"
 )
-model = llm.identifier
+model = llm.id

 # Create the RAG agent
 rag_agent = Agent(
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@ -170,7 +170,7 @@ def _get_endpoint_functions(
        for webmethod in webmethods:
            print(f"Processing {colored(func_name, 'white')}...")
            operation_name = func_name
-            
+
            if webmethod.method == "GET":
                prefix = "get"
            elif webmethod.method == "DELETE":
@ -196,16 +196,10 @@ def _get_endpoint_functions(
 def _get_defining_class(member_fn: str, derived_cls: type) -> type:
    "Find the class in which a member function is first defined in a class inheritance hierarchy."

-    # This import must be dynamic here
-    from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime
-
    # iterate in reverse member resolution order to find most specific class first
    for cls in reversed(inspect.getmro(derived_cls)):
        for name, _ in inspect.getmembers(cls, inspect.isfunction):
            if name == member_fn:
-                # HACK ALERT
-                if cls == RAGToolRuntime:
-                    return ToolRuntime
                return cls

    raise ValidationError(
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -974,11 +974,11 @@ paths:
    get:
      responses:
        '200':
-          description: A ListModelsResponse.
+          description: A OpenAIListModelsResponse.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/ListModelsResponse'
+                $ref: '#/components/schemas/OpenAIListModelsResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -991,8 +991,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
-      summary: List all models.
-      description: List all models.
+      summary: List models using the OpenAI API.
+      description: List models using the OpenAI API.
      parameters: []
      deprecated: false
    post:
@ -1982,40 +1982,6 @@ paths:
          schema:
            type: string
      deprecated: false
-  /v1/synthetic-data-generation/generate:
-    post:
-      responses:
-        '200':
-          description: >-
-            Response containing filtered synthetic data samples and optional statistics
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/SyntheticDataGenerationResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - SyntheticDataGeneration (Coming Soon)
-      summary: >-
-        Generate synthetic data based on input dialogs and apply filtering.
-      description: >-
-        Generate synthetic data based on input dialogs and apply filtering.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/SyntheticDataGenerateRequest'
-        required: true
-      deprecated: false
  /v1/tool-runtime/invoke:
    post:
      responses:
@ -2086,69 +2052,6 @@ paths:
          schema:
            $ref: '#/components/schemas/URL'
      deprecated: false
-  /v1/tool-runtime/rag-tool/insert:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Index documents so they can be used by the RAG system.
-      description: >-
-        Index documents so they can be used by the RAG system.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertRequest'
-        required: true
-      deprecated: false
-  /v1/tool-runtime/rag-tool/query:
-    post:
-      responses:
-        '200':
-          description: >-
-            RAGQueryResult containing the retrieved content and metadata
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RAGQueryResult'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Query the RAG system for context; typically invoked by the agent.
-      description: >-
-        Query the RAG system for context; typically invoked by the agent.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryRequest'
-        required: true
-      deprecated: false
  /v1/toolgroups:
    get:
      responses:
@ -5619,6 +5522,88 @@ components:
      title: ListRoutesResponse
      description: >-
        Response containing a list of all available API routes.
+    OpenAIModel:
+      type: object
+      properties:
+        id:
+          type: string
+        object:
+          type: string
+          const: model
+          default: model
+        created:
+          type: integer
+        owned_by:
+          type: string
+        custom_metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+      additionalProperties: false
+      required:
+        - id
+        - object
+        - created
+        - owned_by
+      title: OpenAIModel
+      description: A model from OpenAI.
+    OpenAIListModelsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIModel'
+      additionalProperties: false
+      required:
+        - data
+      title: OpenAIListModelsResponse
+    ModelType:
+      type: string
+      enum:
+        - llm
+        - embedding
+        - rerank
+      title: ModelType
+      description: >-
+        Enumeration of supported model types in Llama Stack.
+    RegisterModelRequest:
+      type: object
+      properties:
+        model_id:
+          type: string
+          description: The identifier of the model to register.
+        provider_model_id:
+          type: string
+          description: >-
+            The identifier of the model in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model.
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          description: The type of model to register.
+      additionalProperties: false
+      required:
+        - model_id
+      title: RegisterModelRequest
    Model:
      type: object
      properties:
@ -5676,57 +5661,6 @@ components:
      title: Model
      description: >-
        A model resource representing an AI model registered in Llama Stack.
-    ModelType:
-      type: string
-      enum:
-        - llm
-        - embedding
-        - rerank
-      title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
-    ListModelsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Model'
-      additionalProperties: false
-      required:
-        - data
-      title: ListModelsResponse
-    RegisterModelRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: The identifier of the model to register.
-        provider_model_id:
-          type: string
-          description: >-
-            The identifier of the model in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
-        model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
-      required:
-        - model_id
-      title: RegisterModelRequest
    RunModerationRequest:
      type: object
      properties:
@ -8144,20 +8078,6 @@ components:
        - error
      title: ViolationLevel
      description: Severity level of a safety violation.
-    AgentTurnInputType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: agent_turn_input
-          default: agent_turn_input
-          description: >-
-            Discriminator type. Always "agent_turn_input"
-      additionalProperties: false
-      required:
-        - type
-      title: AgentTurnInputType
-      description: Parameter type for agent turn input.
    AggregationFunctionType:
      type: string
      enum:
@ -8400,7 +8320,6 @@ components:
            - $ref: '#/components/schemas/UnionType'
            - $ref: '#/components/schemas/ChatCompletionInputType'
            - $ref: '#/components/schemas/CompletionInputType'
-            - $ref: '#/components/schemas/AgentTurnInputType'
          discriminator:
            propertyName: type
            mapping:
@ -8413,7 +8332,6 @@ components:
              union: '#/components/schemas/UnionType'
              chat_completion_input: '#/components/schemas/ChatCompletionInputType'
              completion_input: '#/components/schemas/CompletionInputType'
-              agent_turn_input: '#/components/schemas/AgentTurnInputType'
        params:
          $ref: '#/components/schemas/ScoringFnParams'
      additionalProperties: false
@ -8494,7 +8412,6 @@ components:
        - $ref: '#/components/schemas/UnionType'
        - $ref: '#/components/schemas/ChatCompletionInputType'
        - $ref: '#/components/schemas/CompletionInputType'
-        - $ref: '#/components/schemas/AgentTurnInputType'
      discriminator:
        propertyName: type
        mapping:
@ -8507,7 +8424,6 @@ components:
          union: '#/components/schemas/UnionType'
          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
          completion_input: '#/components/schemas/CompletionInputType'
-          agent_turn_input: '#/components/schemas/AgentTurnInputType'
    RegisterScoringFunctionRequest:
      type: object
      properties:
@ -8744,45 +8660,29 @@ components:
      required:
        - shield_id
      title: RegisterShieldRequest
-    CompletionMessage:
+    InvokeToolRequest:
      type: object
      properties:
-        role:
+        tool_name:
          type: string
-          const: assistant
-          default: assistant
+          description: The name of the tool to invoke.
+        kwargs:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
          description: >-
-            Must be "assistant" to identify this as the model's response
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: The content of the model's response
-        stop_reason:
-          type: string
-          enum:
-            - end_of_turn
-            - end_of_message
-            - out_of_tokens
-          description: >-
-            Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`:
-            The model finished generating the entire response. - `StopReason.end_of_message`:
-            The model finished generating but generated a partial response -- usually,
-            a tool call. The user may call the tool and continue the conversation
-            with the tool's response. - `StopReason.out_of_tokens`: The model ran
-            out of token budget.
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/ToolCall'
-          description: >-
-            List of tool calls. Each tool call is a ToolCall object.
+            A dictionary of arguments to pass to the tool.
      additionalProperties: false
      required:
-        - role
-        - content
-        - stop_reason
-      title: CompletionMessage
-      description: >-
-        A message containing the model's (assistant) response in a chat conversation.
+        - tool_name
+        - kwargs
+      title: InvokeToolRequest
    ImageContentItem:
      type: object
      properties:
@ -8829,41 +8729,6 @@ components:
        mapping:
          image: '#/components/schemas/ImageContentItem'
          text: '#/components/schemas/TextContentItem'
-    Message:
-      oneOf:
-        - $ref: '#/components/schemas/UserMessage'
-        - $ref: '#/components/schemas/SystemMessage'
-        - $ref: '#/components/schemas/ToolResponseMessage'
-        - $ref: '#/components/schemas/CompletionMessage'
-      discriminator:
-        propertyName: role
-        mapping:
-          user: '#/components/schemas/UserMessage'
-          system: '#/components/schemas/SystemMessage'
-          tool: '#/components/schemas/ToolResponseMessage'
-          assistant: '#/components/schemas/CompletionMessage'
-    SystemMessage:
-      type: object
-      properties:
-        role:
-          type: string
-          const: system
-          default: system
-          description: >-
-            Must be "system" to identify this as a system message
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the "system prompt". If multiple system messages are provided,
-            they are concatenated. The underlying Llama Stack code may also add other
-            system messages (for example, for formatting tool definitions).
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: SystemMessage
-      description: >-
-        A system message providing instructions or context to the model.
    TextContentItem:
      type: object
      properties:
@ -8882,179 +8747,6 @@ components:
        - text
      title: TextContentItem
      description: A text content item
-    ToolCall:
-      type: object
-      properties:
-        call_id:
-          type: string
-        tool_name:
-          oneOf:
-            - type: string
-              enum:
-                - brave_search
-                - wolfram_alpha
-                - photogen
-                - code_interpreter
-              title: BuiltinTool
-            - type: string
-        arguments:
-          type: string
-      additionalProperties: false
-      required:
-        - call_id
-        - tool_name
-        - arguments
-      title: ToolCall
-    ToolResponseMessage:
-      type: object
-      properties:
-        role:
-          type: string
-          const: tool
-          default: tool
-          description: >-
-            Must be "tool" to identify this as a tool response
-        call_id:
-          type: string
-          description: >-
-            Unique identifier for the tool call this response is for
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: The response content from the tool
-      additionalProperties: false
-      required:
-        - role
-        - call_id
-        - content
-      title: ToolResponseMessage
-      description: >-
-        A message representing the result of a tool invocation.
-    URL:
-      type: object
-      properties:
-        uri:
-          type: string
-          description: The URL string pointing to the resource
-      additionalProperties: false
-      required:
-        - uri
-      title: URL
-      description: A URL reference to external content.
-    UserMessage:
-      type: object
-      properties:
-        role:
-          type: string
-          const: user
-          default: user
-          description: >-
-            Must be "user" to identify this as a user message
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the message, which can include text and other media
-        context:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) This field is used internally by Llama Stack to pass RAG context.
-            This field may be removed in the API in the future.
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: UserMessage
-      description: >-
-        A message from the user in a chat conversation.
-    SyntheticDataGenerateRequest:
-      type: object
-      properties:
-        dialogs:
-          type: array
-          items:
-            $ref: '#/components/schemas/Message'
-          description: >-
-            List of conversation messages to use as input for synthetic data generation
-        filtering_function:
-          type: string
-          enum:
-            - none
-            - random
-            - top_k
-            - top_p
-            - top_k_top_p
-            - sigmoid
-          description: >-
-            Type of filtering to apply to generated synthetic data samples
-        model:
-          type: string
-          description: >-
-            (Optional) The identifier of the model to use. The model must be registered
-            with Llama Stack and available via the /models endpoint
-      additionalProperties: false
-      required:
-        - dialogs
-        - filtering_function
-      title: SyntheticDataGenerateRequest
-    SyntheticDataGenerationResponse:
-      type: object
-      properties:
-        synthetic_data:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            List of generated synthetic data samples that passed the filtering criteria
-        statistics:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Statistical information about the generation process and filtering
-            results
-      additionalProperties: false
-      required:
-        - synthetic_data
-      title: SyntheticDataGenerationResponse
-      description: >-
-        Response from the synthetic data generation. Batch of (prompt, response, score)
-        tuples that pass the threshold.
-    InvokeToolRequest:
-      type: object
-      properties:
-        tool_name:
-          type: string
-          description: The name of the tool to invoke.
-        kwargs:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool.
-      additionalProperties: false
-      required:
-        - tool_name
-        - kwargs
-      title: InvokeToolRequest
    ToolInvocationResult:
      type: object
      properties:
@ -9085,6 +8777,17 @@ components:
      additionalProperties: false
      title: ToolInvocationResult
      description: Result of a tool invocation.
+    URL:
+      type: object
+      properties:
+        uri:
+          type: string
+          description: The URL string pointing to the resource
+      additionalProperties: false
+      required:
+        - uri
+      title: URL
+      description: A URL reference to external content.
    ToolDef:
      type: object
      properties:
@ -9155,274 +8858,6 @@ components:
      title: ListToolDefsResponse
      description: >-
        Response containing a list of tool definitions.
-    RAGDocument:
-      type: object
-      properties:
-        document_id:
-          type: string
-          description: The unique identifier for the document.
-        content:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/InterleavedContentItem'
-            - type: array
-              items:
-                $ref: '#/components/schemas/InterleavedContentItem'
-            - $ref: '#/components/schemas/URL'
-          description: The content of the document.
-        mime_type:
-          type: string
-          description: The MIME type of the document.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the document.
-      additionalProperties: false
-      required:
-        - document_id
-        - content
-        - metadata
-      title: RAGDocument
-      description: >-
-        A document to be used for document ingestion in the RAG Tool.
-    InsertRequest:
-      type: object
-      properties:
-        documents:
-          type: array
-          items:
-            $ref: '#/components/schemas/RAGDocument'
-          description: >-
-            List of documents to index in the RAG system
-        vector_store_id:
-          type: string
-          description: >-
-            ID of the vector database to store the document embeddings
-        chunk_size_in_tokens:
-          type: integer
-          description: >-
-            (Optional) Size in tokens for document chunking during indexing
-      additionalProperties: false
-      required:
-        - documents
-        - vector_store_id
-        - chunk_size_in_tokens
-      title: InsertRequest
-    DefaultRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: default
-          default: default
-          description: >-
-            Type of query generator, always 'default'
-        separator:
-          type: string
-          default: ' '
-          description: >-
-            String separator used to join query terms
-      additionalProperties: false
-      required:
-        - type
-        - separator
-      title: DefaultRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the default RAG query generator.
-    LLMRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: llm
-          default: llm
-          description: Type of query generator, always 'llm'
-        model:
-          type: string
-          description: >-
-            Name of the language model to use for query generation
-        template:
-          type: string
-          description: >-
-            Template string for formatting the query generation prompt
-      additionalProperties: false
-      required:
-        - type
-        - model
-        - template
-      title: LLMRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the LLM-based RAG query generator.
-    RAGQueryConfig:
-      type: object
-      properties:
-        query_generator_config:
-          oneOf:
-            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          discriminator:
-            propertyName: type
-            mapping:
-              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          description: Configuration for the query generator.
-        max_tokens_in_context:
-          type: integer
-          default: 4096
-          description: Maximum number of tokens in the context.
-        max_chunks:
-          type: integer
-          default: 5
-          description: Maximum number of chunks to retrieve.
-        chunk_template:
-          type: string
-          default: >
-            Result {index}
-
-            Content: {chunk.content}
-
-            Metadata: {metadata}
-          description: >-
-            Template for formatting each retrieved chunk in the context. Available
-            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
-            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
-            {chunk.content}\nMetadata: {metadata}\n"
-        mode:
-          $ref: '#/components/schemas/RAGSearchMode'
-          default: vector
-          description: >-
-            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
-            "vector".
-        ranker:
-          $ref: '#/components/schemas/Ranker'
-          description: >-
-            Configuration for the ranker to use in hybrid search. Defaults to RRF
-            ranker.
-      additionalProperties: false
-      required:
-        - query_generator_config
-        - max_tokens_in_context
-        - max_chunks
-        - chunk_template
-      title: RAGQueryConfig
-      description: >-
-        Configuration for the RAG query generation.
-    RAGSearchMode:
-      type: string
-      enum:
-        - vector
-        - keyword
-        - hybrid
-      title: RAGSearchMode
-      description: >-
-        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
-        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
-        - HYBRID: Combines both vector and keyword search for better results
-    RRFRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: rrf
-          default: rrf
-          description: The type of ranker, always "rrf"
-        impact_factor:
-          type: number
-          default: 60.0
-          description: >-
-            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
-            results. Must be greater than 0
-      additionalProperties: false
-      required:
-        - type
-        - impact_factor
-      title: RRFRanker
-      description: >-
-        Reciprocal Rank Fusion (RRF) ranker configuration.
-    Ranker:
-      oneOf:
-        - $ref: '#/components/schemas/RRFRanker'
-        - $ref: '#/components/schemas/WeightedRanker'
-      discriminator:
-        propertyName: type
-        mapping:
-          rrf: '#/components/schemas/RRFRanker'
-          weighted: '#/components/schemas/WeightedRanker'
-    WeightedRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: weighted
-          default: weighted
-          description: The type of ranker, always "weighted"
-        alpha:
-          type: number
-          default: 0.5
-          description: >-
-            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
-            only use vector scores, values in between blend both scores.
-      additionalProperties: false
-      required:
-        - type
-        - alpha
-      title: WeightedRanker
-      description: >-
-        Weighted ranker configuration that combines vector and keyword scores.
-    QueryRequest:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The query content to search for in the indexed documents
-        vector_store_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of vector database IDs to search within
-        query_config:
-          $ref: '#/components/schemas/RAGQueryConfig'
-          description: >-
-            (Optional) Configuration parameters for the query operation
-      additionalProperties: false
-      required:
-        - content
-        - vector_store_ids
-      title: QueryRequest
-    RAGQueryResult:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The retrieved content from the query
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata about the query result
-      additionalProperties: false
-      required:
-        - metadata
-      title: RAGQueryResult
-      description: >-
-        Result of a RAG query containing retrieved content and metadata.
    ToolGroup:
      type: object
      properties:
@ -10686,8 +10121,6 @@ tags:
    description: ''
  - name: Shields
    description: ''
-  - name: SyntheticDataGeneration (Coming Soon)
-    description: ''
  - name: ToolGroups
    description: ''
  - name: ToolRuntime
@ -10710,7 +10143,6 @@ x-tagGroups:
      - Scoring
      - ScoringFunctions
      - Shields
-      - SyntheticDataGeneration (Coming Soon)
      - ToolGroups
      - ToolRuntime
      - VectorIO
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml