mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-28 04:21:58 +00:00
Merge branch 'meta-llama:main' into feat/litellm_sambanova_usage
This commit is contained in:
commit
e49bcd46fe
90 changed files with 3142 additions and 586 deletions
189
docs/_static/llama-stack-spec.html
vendored
189
docs/_static/llama-stack-spec.html
vendored
|
|
@ -363,6 +363,37 @@
|
|||
}
|
||||
},
|
||||
"/v1/agents": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A ListAgentsResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListAgentsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "List all agents.",
|
||||
"parameters": []
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
|
@ -609,6 +640,47 @@
|
|||
}
|
||||
},
|
||||
"/v1/agents/{agent_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "An Agent of the agent.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Agent"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "Describe an agent by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"description": "ID of the agent.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"delete": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
|
@ -2276,6 +2348,49 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/agents/{agent_id}/sessions": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A ListAgentSessionsResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListAgentSessionsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "List all session(s) of a given agent.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the agent to list sessions for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
|
@ -6565,6 +6680,28 @@
|
|||
"title": "ScoringResult",
|
||||
"description": "A scoring result for a single row."
|
||||
},
|
||||
"Agent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"agent_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"agent_config": {
|
||||
"$ref": "#/components/schemas/AgentConfig"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"agent_id",
|
||||
"agent_config",
|
||||
"created_at"
|
||||
],
|
||||
"title": "Agent"
|
||||
},
|
||||
"Session": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -7907,6 +8044,38 @@
|
|||
],
|
||||
"title": "ToolInvocationResult"
|
||||
},
|
||||
"ListAgentSessionsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Session"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "ListAgentSessionsResponse"
|
||||
},
|
||||
"ListAgentsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Agent"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "ListAgentsResponse"
|
||||
},
|
||||
"BucketResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -9321,21 +9490,11 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"tool_responses": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolResponse"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolResponseMessage"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "The tool call responses to resume the turn with. NOTE: ToolResponseMessage will be deprecated. Use ToolResponse."
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolResponse"
|
||||
},
|
||||
"description": "The tool call responses to resume the turn with."
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean",
|
||||
|
|
|
|||
131
docs/_static/llama-stack-spec.yaml
vendored
131
docs/_static/llama-stack-spec.yaml
vendored
|
|
@ -238,6 +238,28 @@ paths:
|
|||
$ref: '#/components/schemas/CompletionRequest'
|
||||
required: true
|
||||
/v1/agents:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ListAgentsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListAgentsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: List all agents.
|
||||
parameters: []
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
|
@ -410,6 +432,34 @@ paths:
|
|||
$ref: '#/components/schemas/CreateUploadSessionRequest'
|
||||
required: true
|
||||
/v1/agents/{agent_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: An Agent of the agent.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Agent'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: Describe an agent by its ID.
|
||||
parameters:
|
||||
- name: agent_id
|
||||
in: path
|
||||
description: ID of the agent.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
delete:
|
||||
responses:
|
||||
'200':
|
||||
|
|
@ -1528,6 +1578,36 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/agents/{agent_id}/sessions:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ListAgentSessionsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListAgentSessionsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: List all session(s) of a given agent.
|
||||
parameters:
|
||||
- name: agent_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the agent to list sessions for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/eval/benchmarks:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -4549,6 +4629,22 @@ components:
|
|||
- aggregated_results
|
||||
title: ScoringResult
|
||||
description: A scoring result for a single row.
|
||||
Agent:
|
||||
type: object
|
||||
properties:
|
||||
agent_id:
|
||||
type: string
|
||||
agent_config:
|
||||
$ref: '#/components/schemas/AgentConfig'
|
||||
created_at:
|
||||
type: string
|
||||
format: date-time
|
||||
additionalProperties: false
|
||||
required:
|
||||
- agent_id
|
||||
- agent_config
|
||||
- created_at
|
||||
title: Agent
|
||||
Session:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -5385,6 +5481,28 @@ components:
|
|||
required:
|
||||
- content
|
||||
title: ToolInvocationResult
|
||||
ListAgentSessionsResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Session'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: ListAgentSessionsResponse
|
||||
ListAgentsResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Agent'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: ListAgentsResponse
|
||||
BucketResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -6287,16 +6405,11 @@ components:
|
|||
type: object
|
||||
properties:
|
||||
tool_responses:
|
||||
oneOf:
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolResponse'
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolResponseMessage'
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolResponse'
|
||||
description: >-
|
||||
The tool call responses to resume the turn with. NOTE: ToolResponseMessage
|
||||
will be deprecated. Use ToolResponse.
|
||||
The tool call responses to resume the turn with.
|
||||
stream:
|
||||
type: boolean
|
||||
description: Whether to stream the response.
|
||||
|
|
|
|||
|
|
@ -1267,7 +1267,6 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"# NBVAL_SKIP\n",
|
||||
"from pydantic import BaseModel\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
|
@ -1279,7 +1278,7 @@
|
|||
"\n",
|
||||
"user_input = \"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003. Extract this information into JSON for me. \"\n",
|
||||
"response = client.inference.completion(\n",
|
||||
" model_id=model_id,\n",
|
||||
" model_id=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
|
||||
" content=user_input,\n",
|
||||
" stream=False,\n",
|
||||
" sampling_params={\n",
|
||||
|
|
@ -1640,7 +1639,7 @@
|
|||
"agent = Agent(\n",
|
||||
" client, \n",
|
||||
" model=model_id,\n",
|
||||
" instructions=\"You are a helpful assistant\",\n",
|
||||
" instructions=\"You are a helpful assistant. Use websearch tool to help answer questions.\",\n",
|
||||
" tools=[\"builtin::websearch\"],\n",
|
||||
")\n",
|
||||
"user_prompts = [\n",
|
||||
|
|
|
|||
|
|
@ -1,9 +1 @@
|
|||
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
|
||||
|
||||
Please install the following packages before running the script:
|
||||
|
||||
```
|
||||
pip install fire PyYAML
|
||||
```
|
||||
|
||||
Then simply run `sh run_openapi_generator.sh`
|
||||
|
|
|
|||
|
|
@ -23,9 +23,12 @@ In this example, we will show you how to:
|
|||
|
||||
##### Building a Search Agent
|
||||
```python
|
||||
from llama_stack_client import LlamaStackClient
|
||||
from llama_stack_client.lib.agents.agent import Agent
|
||||
from llama_stack_client.lib.agents.event_logger import EventLogger
|
||||
|
||||
client = LlamaStackClient(base_url=f"http://{HOST}:{PORT}")
|
||||
|
||||
agent = Agent(
|
||||
client,
|
||||
model="meta-llama/Llama-3.3-70B-Instruct",
|
||||
|
|
@ -33,7 +36,7 @@ agent = Agent(
|
|||
tools=["builtin::websearch"],
|
||||
)
|
||||
user_prompts = [
|
||||
"Which teams played in the NBA western conference finals of 2024. Search the web for the answer.",
|
||||
"Which teams played in the NBA Western Conference Finals of 2024. Search the web for the answer.",
|
||||
"In which episode and season of South Park does Bill Cosby (BSM-471) first appear? Give me the number and title. Search the web for the answer.",
|
||||
"What is the British-American kickboxer Andrew Tate's kickboxing name? Search the web for the answer.",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@ Can be set to any of the following log levels:
|
|||
|
||||
The default global log level is `info`. `all` sets the log level for all components.
|
||||
|
||||
A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log`
|
||||
|
||||
### Llama Stack Build
|
||||
|
||||
In order to build your own distribution, we recommend you clone the `llama-stack` repository.
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ The following models are available by default:
|
|||
- `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||
- `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
|
||||
- `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
|
||||
- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
|
||||
- `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||
- `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
|
||||
- `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
|
|||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
|
||||
| vector_io | `inline::sqlite-vec`, `remote::chromadb`, `remote::pgvector` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
|
||||
|
|
@ -130,7 +130,7 @@ llama stack run ./run-with-safety.yaml \
|
|||
### (Optional) Update Model Serving Configuration
|
||||
|
||||
```{note}
|
||||
Please check the [model_entries](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
|
||||
Please check the [model_entries](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/models.py) for the supported Ollama models.
|
||||
```
|
||||
|
||||
To serve a new model with `ollama`
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# llama (server-side) CLI Reference
|
||||
|
||||
The `llama` CLI tool helps you setup and use the Llama Stack. It should be available on your path after installing the `llama-stack` package.
|
||||
The `llama` CLI tool helps you set up and use the Llama Stack. The CLI is available on your path after installing the `llama-stack` package.
|
||||
|
||||
## Installation
|
||||
|
||||
|
|
@ -27,9 +27,9 @@ You have two ways to install Llama Stack:
|
|||
|
||||
|
||||
## `llama` subcommands
|
||||
1. `download`: `llama` cli tools supports downloading the model from Meta or Hugging Face.
|
||||
2. `model`: Lists available models and their properties.
|
||||
3. `stack`: Allows you to build and run a Llama Stack server. You can read more about this [here](../../distributions/building_distro).
|
||||
1. `download`: Supports downloading models from Meta or Hugging Face. [Downloading models](#downloading-models)
|
||||
2. `model`: Lists available models and their properties. [Understanding models](#understand-the-models)
|
||||
3. `stack`: Allows you to build a stack using the `llama stack` distribution and run a Llama Stack server. You can read more about how to build a Llama Stack distribution in the [Build your own Distribution](../../distributions/building_distro) documentation.
|
||||
|
||||
### Sample Usage
|
||||
|
||||
|
|
@ -117,7 +117,7 @@ You should see a table like this:
|
|||
+----------------------------------+------------------------------------------+----------------+
|
||||
```
|
||||
|
||||
To download models, you can use the llama download command.
|
||||
To download models, you can use the `llama download` command.
|
||||
|
||||
### Downloading from [Meta](https://llama.meta.com/llama-downloads/)
|
||||
|
||||
|
|
@ -191,7 +191,7 @@ You should see a table like this:
|
|||
The `llama model` command helps you explore the model’s interface.
|
||||
|
||||
1. `download`: Download the model from different sources. (meta, huggingface)
|
||||
2. `list`: Lists all the models available for download with hardware requirements to deploy the models.
|
||||
2. `list`: Lists all the models available for download with hardware requirements for deploying the models.
|
||||
3. `prompt-format`: Show llama model message formats.
|
||||
4. `describe`: Describes all the properties of the model.
|
||||
|
||||
|
|
@ -262,13 +262,12 @@ llama model prompt-format -m Llama3.2-3B-Instruct
|
|||

|
||||
|
||||
|
||||
|
||||
You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.
|
||||
|
||||
**NOTE**: Outputs in terminal are color printed to show special tokens.
|
||||
|
||||
### Remove model
|
||||
You can run `llama model remove` to remove unecessary model:
|
||||
You can run `llama model remove` to remove an unnecessary model:
|
||||
|
||||
```
|
||||
llama model remove -m Llama-Guard-3-8B-int8
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
|||
ollama run llama3.2:3b-instruct-fp16 --keepalive -1m
|
||||
```
|
||||
**Note**:
|
||||
- The supported models for llama stack for now is listed in [here](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L43)
|
||||
- The supported models for llama stack for now is listed in [here](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/models.py)
|
||||
- `keepalive -1m` is used so that ollama continues to keep the model in memory indefinitely. Otherwise, ollama frees up memory and you would have to run `ollama run` again.
|
||||
|
||||
---
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue