Merge remote-tracking branch 'origin/main' into k8s_demo

2025-10-24 08:47:26 +00:00 · 2025-07-29 09:00:45 -07:00 · 2025-07-29 09:00:45 -07:00 · 95d25ddfe2
commit 95d25ddfe2
parent e614241876 870a37ff4b
101 changed files with 3309 additions and 5108 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -9770,7 +9770,7 @@
                            {
                                "type": "array",
                                "items": {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
                                }
                            }
                        ],
@ -9821,13 +9821,17 @@
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIFile"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
-                        "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                        "image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam",
+                        "file": "#/components/schemas/OpenAIFile"
                    }
                }
            },
@ -9955,7 +9959,7 @@
                            {
                                "type": "array",
                                "items": {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
                                }
                            }
                        ],
@ -9974,6 +9978,41 @@
                "title": "OpenAIDeveloperMessageParam",
                "description": "A message from the developer in an OpenAI-compatible chat completion request."
            },
+            "OpenAIFile": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "file",
+                        "default": "file"
+                    },
+                    "file": {
+                        "$ref": "#/components/schemas/OpenAIFileFile"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "file"
+                ],
+                "title": "OpenAIFile"
+            },
+            "OpenAIFileFile": {
+                "type": "object",
+                "properties": {
+                    "file_data": {
+                        "type": "string"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "filename": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "OpenAIFileFile"
+            },
            "OpenAIImageURL": {
                "type": "object",
                "properties": {
@ -10036,7 +10075,7 @@
                            {
                                "type": "array",
                                "items": {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
                                }
                            }
                        ],
@ -10107,7 +10146,7 @@
                            {
                                "type": "array",
                                "items": {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
                                }
                            }
                        ],
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -6895,7 +6895,7 @@ components:
            - type: string
            - type: array
              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          description: The content of the model's response
        name:
          type: string
@ -6934,11 +6934,13 @@ components:
      oneOf:
        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+        - $ref: '#/components/schemas/OpenAIFile'
      discriminator:
        propertyName: type
        mapping:
          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          file: '#/components/schemas/OpenAIFile'
    OpenAIChatCompletionContentPartTextParam:
      type: object
      properties:
@ -7037,7 +7039,7 @@ components:
            - type: string
            - type: array
              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          description: The content of the developer message
        name:
          type: string
@ -7050,6 +7052,31 @@ components:
      title: OpenAIDeveloperMessageParam
      description: >-
        A message from the developer in an OpenAI-compatible chat completion request.
+    OpenAIFile:
+      type: object
+      properties:
+        type:
+          type: string
+          const: file
+          default: file
+        file:
+          $ref: '#/components/schemas/OpenAIFileFile'
+      additionalProperties: false
+      required:
+        - type
+        - file
+      title: OpenAIFile
+    OpenAIFileFile:
+      type: object
+      properties:
+        file_data:
+          type: string
+        file_id:
+          type: string
+        filename:
+          type: string
+      additionalProperties: false
+      title: OpenAIFileFile
    OpenAIImageURL:
      type: object
      properties:
@ -7090,7 +7117,7 @@ components:
            - type: string
            - type: array
              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          description: >-
            The content of the "system prompt". If multiple system messages are provided,
            they are concatenated. The underlying Llama Stack code may also add other
@ -7148,7 +7175,7 @@ components:
            - type: string
            - type: array
              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
+                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
          description: The response content from the tool
      additionalProperties: false
      required:
--- a/docs/quick_start.ipynb
+++ b/docs/quick_start.ipynb
@ -249,12 +249,6 @@
      ],
      "source": [
        "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
-        "import os\n",
-        "\n",
-        "os.environ[\"ENABLE_OLLAMA\"] = \"ollama\"\n",
-        "os.environ[\"OLLAMA_INFERENCE_MODEL\"] = \"llama3.2:3b\"\n",
-        "os.environ[\"OLLAMA_EMBEDDING_MODEL\"] = \"all-minilm:l6-v2\"\n",
-        "os.environ[\"OLLAMA_EMBEDDING_DIMENSION\"] = \"384\"\n",
        "\n",
        "vector_db_id = \"my_demo_vector_db\"\n",
        "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
--- a/docs/source/distributions/importing_as_library.md
+++ b/docs/source/distributions/importing_as_library.md
@ -13,7 +13,7 @@ llama stack build --template starter --image-type venv
 from llama_stack.distribution.library_client import LlamaStackAsLibraryClient

 client = LlamaStackAsLibraryClient(
-    "ollama",
+    "starter",
    # provider_data is optional, but if you need to pass in any provider specific data, you can do so here.
    provider_data={"tavily_search_api_key": os.environ["TAVILY_SEARCH_API_KEY"]},
 )
--- a/docs/source/distributions/self_hosted_distro/nvidia.md
+++ b/docs/source/distributions/self_hosted_distro/nvidia.md
@ -40,16 +40,16 @@ The following environment variables can be configured:

 The following models are available by default:

- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta/llama-3.3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+- `meta/llama3-8b-instruct `
+- `meta/llama3-70b-instruct `
+- `meta/llama-3.1-8b-instruct `
+- `meta/llama-3.1-70b-instruct `
+- `meta/llama-3.1-405b-instruct `
+- `meta/llama-3.2-1b-instruct `
+- `meta/llama-3.2-3b-instruct `
+- `meta/llama-3.2-11b-vision-instruct `
+- `meta/llama-3.2-90b-vision-instruct `
+- `meta/llama-3.3-70b-instruct `
 - `nvidia/llama-3.2-nv-embedqa-1b-v2 `
 - `nvidia/nv-embedqa-e5-v5 `
 - `nvidia/nv-embedqa-mistral-7b-v2 `
--- a/docs/source/distributions/self_hosted_distro/starter.md
+++ b/docs/source/distributions/self_hosted_distro/starter.md
@ -158,7 +158,7 @@ export ENABLE_PGVECTOR=__disabled__
 The starter distribution uses several patterns for provider IDs:

 1. **Direct provider IDs**: `faiss`, `ollama`, `vllm`
-2. **Environment-based provider IDs**: `${env.ENABLE_SQLITE_VEC+sqlite-vec}`
+2. **Environment-based provider IDs**: `${env.ENABLE_SQLITE_VEC:+sqlite-vec}`
 3. **Model-based provider IDs**: `${env.OLLAMA_INFERENCE_MODEL:__disabled__}`

 When using the `+` pattern (like `${env.ENABLE_SQLITE_VEC+sqlite-vec}`), the provider is enabled by default and can be disabled by setting the environment variable to `__disabled__`.
--- a/docs/source/getting_started/detailed_tutorial.md
+++ b/docs/source/getting_started/detailed_tutorial.md
@ -59,7 +59,7 @@ Now let's build and run the Llama Stack config for Ollama.
 We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.

 ```bash
-ENABLE_OLLAMA=ollama OLLAMA_INFERENCE_MODEL="llama3.2:3b" llama stack build --template starter --image-type venv --run
+llama stack build --template starter --image-type venv --run
 ```
 :::
 :::{tab-item} Using `conda`
@ -70,7 +70,7 @@ which defines the providers and their settings.
 Now let's build and run the Llama Stack config for Ollama.

 ```bash
-ENABLE_OLLAMA=ollama INFERENCE_MODEL="llama3.2:3b" llama stack build --template starter --image-type conda --run
+llama stack build --template starter --image-type conda --run
 ```
 :::
 :::{tab-item} Using a Container
@ -80,8 +80,6 @@ component that works with different inference providers out of the box. For this
 configurations, please check out [this guide](../distributions/building_distro.md).
 First lets setup some environment variables and create a local directory to mount into the container’s file system.
 ```bash
-export INFERENCE_MODEL="llama3.2:3b"
-export ENABLE_OLLAMA=ollama
 export LLAMA_STACK_PORT=8321
 mkdir -p ~/.llama
 ```
@ -94,7 +92,6 @@ docker run -it \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-starter \
  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
  --env OLLAMA_URL=http://host.docker.internal:11434
 ```
 Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with
@ -116,7 +113,6 @@ docker run -it \
  --network=host \
  llamastack/distribution-starter \
  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
  --env OLLAMA_URL=http://localhost:11434
 ```
 :::
--- a/docs/source/getting_started/quickstart.md
+++ b/docs/source/getting_started/quickstart.md
@ -19,7 +19,7 @@ ollama run llama3.2:3b --keepalive 60m
 #### Step 2: Run the Llama Stack server
 We will use `uv` to run the Llama Stack server.
 ```bash
-ENABLE_OLLAMA=ollama OLLAMA_INFERENCE_MODEL=llama3.2:3b uv run --with llama-stack llama stack build --template starter --image-type venv --run
+uv run --with llama-stack llama stack build --template starter --image-type venv --run
 ```
 #### Step 3: Run the demo
 Now open up a new terminal and copy the following script into a file named `demo_script.py`.
--- a/docs/source/providers/external.md
+++ b/docs/source/providers/external.md
@ -12,8 +12,7 @@ To enable external providers, you need to add `module` into your build yaml, all
 an example entry in your build.yaml should look like:

 ```
- provider_id: ramalama
-  provider_type: remote::ramalama
+- provider_type: remote::ramalama
  module: ramalama_stack
 ```

@ -255,8 +254,7 @@ distribution_spec:
  container_image: null
  providers:
    inference:
-    - provider_id: ramalama
-      provider_type: remote::ramalama
+    - provider_type: remote::ramalama
      module: ramalama_stack==0.3.0a0
 image_type: venv
 image_name: null
--- a/docs/source/providers/inference/remote_anthropic.md
+++ b/docs/source/providers/inference/remote_anthropic.md
@ -13,7 +13,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
 ## Sample Configuration

 ```yaml
-api_key: ${env.ANTHROPIC_API_KEY}
+api_key: ${env.ANTHROPIC_API_KEY:=}

 ```

--- a/docs/source/providers/inference/remote_cerebras.md
+++ b/docs/source/providers/inference/remote_cerebras.md
@ -15,7 +15,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.

 ```yaml
 base_url: https://api.cerebras.ai
-api_key: ${env.CEREBRAS_API_KEY}
+api_key: ${env.CEREBRAS_API_KEY:=}

 ```

--- a/docs/source/providers/inference/remote_databricks.md
+++ b/docs/source/providers/inference/remote_databricks.md
@ -14,8 +14,8 @@ Databricks inference provider for running models on Databricks' unified analytic
 ## Sample Configuration

 ```yaml
-url: ${env.DATABRICKS_URL}
-api_token: ${env.DATABRICKS_API_TOKEN}
+url: ${env.DATABRICKS_URL:=}
+api_token: ${env.DATABRICKS_API_TOKEN:=}

 ```

--- a/docs/source/providers/inference/remote_fireworks.md
+++ b/docs/source/providers/inference/remote_fireworks.md
@ -16,7 +16,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire

 ```yaml
 url: https://api.fireworks.ai/inference/v1
-api_key: ${env.FIREWORKS_API_KEY}
+api_key: ${env.FIREWORKS_API_KEY:=}

 ```

--- a/docs/source/providers/inference/remote_gemini.md
+++ b/docs/source/providers/inference/remote_gemini.md
@ -13,7 +13,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
 ## Sample Configuration

 ```yaml
-api_key: ${env.GEMINI_API_KEY}
+api_key: ${env.GEMINI_API_KEY:=}

 ```

--- a/docs/source/providers/inference/remote_groq.md
+++ b/docs/source/providers/inference/remote_groq.md
@ -15,7 +15,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.

 ```yaml
 url: https://api.groq.com
-api_key: ${env.GROQ_API_KEY}
+api_key: ${env.GROQ_API_KEY:=}

 ```

--- a/docs/source/providers/inference/remote_openai.md
+++ b/docs/source/providers/inference/remote_openai.md
@ -9,11 +9,13 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `api_key` | `str \| None` | No |  | API key for OpenAI models |
+| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |

 ## Sample Configuration

 ```yaml
-api_key: ${env.OPENAI_API_KEY}
+api_key: ${env.OPENAI_API_KEY:=}
+base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}

 ```

--- a/docs/source/providers/inference/remote_sambanova-openai-compat.md
+++ b/docs/source/providers/inference/remote_sambanova-openai-compat.md
@ -15,7 +15,7 @@ SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API

 ```yaml
 openai_compat_api_base: https://api.sambanova.ai/v1
-api_key: ${env.SAMBANOVA_API_KEY}
+api_key: ${env.SAMBANOVA_API_KEY:=}

 ```

--- a/docs/source/providers/inference/remote_sambanova.md
+++ b/docs/source/providers/inference/remote_sambanova.md
@ -15,7 +15,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec

 ```yaml
 url: https://api.sambanova.ai/v1
-api_key: ${env.SAMBANOVA_API_KEY}
+api_key: ${env.SAMBANOVA_API_KEY:=}

 ```

--- a/docs/source/providers/inference/remote_tgi.md
+++ b/docs/source/providers/inference/remote_tgi.md
@ -13,7 +13,7 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
 ## Sample Configuration

 ```yaml
-url: ${env.TGI_URL}
+url: ${env.TGI_URL:=}

 ```

--- a/docs/source/providers/inference/remote_together.md
+++ b/docs/source/providers/inference/remote_together.md
@ -16,7 +16,7 @@ Together AI inference provider for open-source models and collaborative AI devel

 ```yaml
 url: https://api.together.xyz/v1
-api_key: ${env.TOGETHER_API_KEY}
+api_key: ${env.TOGETHER_API_KEY:=}

 ```

--- a/docs/source/providers/safety/remote_sambanova.md
+++ b/docs/source/providers/safety/remote_sambanova.md
@ -15,7 +15,7 @@ SambaNova's safety provider for content moderation and safety filtering.

 ```yaml
 url: https://api.sambanova.ai/v1
-api_key: ${env.SAMBANOVA_API_KEY}
+api_key: ${env.SAMBANOVA_API_KEY:=}

 ```