Merge remote-tracking branch 'origin/main' into k8s_demo

This commit is contained in:
Kai Wu 2025-07-29 09:00:45 -07:00
commit 95d25ddfe2
101 changed files with 3309 additions and 5108 deletions

View file

@ -9770,7 +9770,7 @@
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
}
}
],
@ -9821,13 +9821,17 @@
},
{
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
},
{
"$ref": "#/components/schemas/OpenAIFile"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
"image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
"image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam",
"file": "#/components/schemas/OpenAIFile"
}
}
},
@ -9955,7 +9959,7 @@
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
}
}
],
@ -9974,6 +9978,41 @@
"title": "OpenAIDeveloperMessageParam",
"description": "A message from the developer in an OpenAI-compatible chat completion request."
},
"OpenAIFile": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "file",
"default": "file"
},
"file": {
"$ref": "#/components/schemas/OpenAIFileFile"
}
},
"additionalProperties": false,
"required": [
"type",
"file"
],
"title": "OpenAIFile"
},
"OpenAIFileFile": {
"type": "object",
"properties": {
"file_data": {
"type": "string"
},
"file_id": {
"type": "string"
},
"filename": {
"type": "string"
}
},
"additionalProperties": false,
"title": "OpenAIFileFile"
},
"OpenAIImageURL": {
"type": "object",
"properties": {
@ -10036,7 +10075,7 @@
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
}
}
],
@ -10107,7 +10146,7 @@
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
}
}
],

View file

@ -6895,7 +6895,7 @@ components:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
$ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
description: The content of the model's response
name:
type: string
@ -6934,11 +6934,13 @@ components:
oneOf:
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
- $ref: '#/components/schemas/OpenAIFile'
discriminator:
propertyName: type
mapping:
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
file: '#/components/schemas/OpenAIFile'
OpenAIChatCompletionContentPartTextParam:
type: object
properties:
@ -7037,7 +7039,7 @@ components:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
$ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
description: The content of the developer message
name:
type: string
@ -7050,6 +7052,31 @@ components:
title: OpenAIDeveloperMessageParam
description: >-
A message from the developer in an OpenAI-compatible chat completion request.
OpenAIFile:
type: object
properties:
type:
type: string
const: file
default: file
file:
$ref: '#/components/schemas/OpenAIFileFile'
additionalProperties: false
required:
- type
- file
title: OpenAIFile
OpenAIFileFile:
type: object
properties:
file_data:
type: string
file_id:
type: string
filename:
type: string
additionalProperties: false
title: OpenAIFileFile
OpenAIImageURL:
type: object
properties:
@ -7090,7 +7117,7 @@ components:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
$ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
description: >-
The content of the "system prompt". If multiple system messages are provided,
they are concatenated. The underlying Llama Stack code may also add other
@ -7148,7 +7175,7 @@ components:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
$ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
description: The response content from the tool
additionalProperties: false
required:

View file

@ -249,12 +249,6 @@
],
"source": [
"from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
"import os\n",
"\n",
"os.environ[\"ENABLE_OLLAMA\"] = \"ollama\"\n",
"os.environ[\"OLLAMA_INFERENCE_MODEL\"] = \"llama3.2:3b\"\n",
"os.environ[\"OLLAMA_EMBEDDING_MODEL\"] = \"all-minilm:l6-v2\"\n",
"os.environ[\"OLLAMA_EMBEDDING_DIMENSION\"] = \"384\"\n",
"\n",
"vector_db_id = \"my_demo_vector_db\"\n",
"client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",

View file

@ -13,7 +13,7 @@ llama stack build --template starter --image-type venv
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
client = LlamaStackAsLibraryClient(
"ollama",
"starter",
# provider_data is optional, but if you need to pass in any provider specific data, you can do so here.
provider_data={"tavily_search_api_key": os.environ["TAVILY_SEARCH_API_KEY"]},
)

View file

@ -40,16 +40,16 @@ The following environment variables can be configured:
The following models are available by default:
- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta/llama-3.3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta/llama3-8b-instruct `
- `meta/llama3-70b-instruct `
- `meta/llama-3.1-8b-instruct `
- `meta/llama-3.1-70b-instruct `
- `meta/llama-3.1-405b-instruct `
- `meta/llama-3.2-1b-instruct `
- `meta/llama-3.2-3b-instruct `
- `meta/llama-3.2-11b-vision-instruct `
- `meta/llama-3.2-90b-vision-instruct `
- `meta/llama-3.3-70b-instruct `
- `nvidia/llama-3.2-nv-embedqa-1b-v2 `
- `nvidia/nv-embedqa-e5-v5 `
- `nvidia/nv-embedqa-mistral-7b-v2 `

View file

@ -158,7 +158,7 @@ export ENABLE_PGVECTOR=__disabled__
The starter distribution uses several patterns for provider IDs:
1. **Direct provider IDs**: `faiss`, `ollama`, `vllm`
2. **Environment-based provider IDs**: `${env.ENABLE_SQLITE_VEC+sqlite-vec}`
2. **Environment-based provider IDs**: `${env.ENABLE_SQLITE_VEC:+sqlite-vec}`
3. **Model-based provider IDs**: `${env.OLLAMA_INFERENCE_MODEL:__disabled__}`
When using the `+` pattern (like `${env.ENABLE_SQLITE_VEC+sqlite-vec}`), the provider is enabled by default and can be disabled by setting the environment variable to `__disabled__`.

View file

@ -59,7 +59,7 @@ Now let's build and run the Llama Stack config for Ollama.
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
```bash
ENABLE_OLLAMA=ollama OLLAMA_INFERENCE_MODEL="llama3.2:3b" llama stack build --template starter --image-type venv --run
llama stack build --template starter --image-type venv --run
```
:::
:::{tab-item} Using `conda`
@ -70,7 +70,7 @@ which defines the providers and their settings.
Now let's build and run the Llama Stack config for Ollama.
```bash
ENABLE_OLLAMA=ollama INFERENCE_MODEL="llama3.2:3b" llama stack build --template starter --image-type conda --run
llama stack build --template starter --image-type conda --run
```
:::
:::{tab-item} Using a Container
@ -80,8 +80,6 @@ component that works with different inference providers out of the box. For this
configurations, please check out [this guide](../distributions/building_distro.md).
First lets setup some environment variables and create a local directory to mount into the containers file system.
```bash
export INFERENCE_MODEL="llama3.2:3b"
export ENABLE_OLLAMA=ollama
export LLAMA_STACK_PORT=8321
mkdir -p ~/.llama
```
@ -94,7 +92,6 @@ docker run -it \
-v ~/.llama:/root/.llama \
llamastack/distribution-starter \
--port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=$INFERENCE_MODEL \
--env OLLAMA_URL=http://host.docker.internal:11434
```
Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with
@ -116,7 +113,6 @@ docker run -it \
--network=host \
llamastack/distribution-starter \
--port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=$INFERENCE_MODEL \
--env OLLAMA_URL=http://localhost:11434
```
:::

View file

@ -19,7 +19,7 @@ ollama run llama3.2:3b --keepalive 60m
#### Step 2: Run the Llama Stack server
We will use `uv` to run the Llama Stack server.
```bash
ENABLE_OLLAMA=ollama OLLAMA_INFERENCE_MODEL=llama3.2:3b uv run --with llama-stack llama stack build --template starter --image-type venv --run
uv run --with llama-stack llama stack build --template starter --image-type venv --run
```
#### Step 3: Run the demo
Now open up a new terminal and copy the following script into a file named `demo_script.py`.

View file

@ -12,8 +12,7 @@ To enable external providers, you need to add `module` into your build yaml, all
an example entry in your build.yaml should look like:
```
- provider_id: ramalama
provider_type: remote::ramalama
- provider_type: remote::ramalama
module: ramalama_stack
```
@ -255,8 +254,7 @@ distribution_spec:
container_image: null
providers:
inference:
- provider_id: ramalama
provider_type: remote::ramalama
- provider_type: remote::ramalama
module: ramalama_stack==0.3.0a0
image_type: venv
image_name: null

View file

@ -13,7 +13,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
## Sample Configuration
```yaml
api_key: ${env.ANTHROPIC_API_KEY}
api_key: ${env.ANTHROPIC_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
```yaml
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY}
api_key: ${env.CEREBRAS_API_KEY:=}
```

View file

@ -14,8 +14,8 @@ Databricks inference provider for running models on Databricks' unified analytic
## Sample Configuration
```yaml
url: ${env.DATABRICKS_URL}
api_token: ${env.DATABRICKS_API_TOKEN}
url: ${env.DATABRICKS_URL:=}
api_token: ${env.DATABRICKS_API_TOKEN:=}
```

View file

@ -16,7 +16,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
```yaml
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY}
api_key: ${env.FIREWORKS_API_KEY:=}
```

View file

@ -13,7 +13,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
## Sample Configuration
```yaml
api_key: ${env.GEMINI_API_KEY}
api_key: ${env.GEMINI_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
```yaml
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY}
api_key: ${env.GROQ_API_KEY:=}
```

View file

@ -9,11 +9,13 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | API key for OpenAI models |
| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
## Sample Configuration
```yaml
api_key: ${env.OPENAI_API_KEY}
api_key: ${env.OPENAI_API_KEY:=}
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
```

View file

@ -15,7 +15,7 @@ SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API
```yaml
openai_compat_api_base: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
api_key: ${env.SAMBANOVA_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
```yaml
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
api_key: ${env.SAMBANOVA_API_KEY:=}
```

View file

@ -13,7 +13,7 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
## Sample Configuration
```yaml
url: ${env.TGI_URL}
url: ${env.TGI_URL:=}
```

View file

@ -16,7 +16,7 @@ Together AI inference provider for open-source models and collaborative AI devel
```yaml
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY}
api_key: ${env.TOGETHER_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ SambaNova's safety provider for content moderation and safety filtering.
```yaml
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
api_key: ${env.SAMBANOVA_API_KEY:=}
```