Merge branch 'meta-llama:main' into feat/litellm_sambanova_usage

This commit is contained in:
Jorge Piedrahita Ortiz 2025-03-12 15:12:42 -05:00 committed by GitHub
commit e49bcd46fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
90 changed files with 3142 additions and 586 deletions

View file

@ -363,6 +363,37 @@
}
},
"/v1/agents": {
"get": {
"responses": {
"200": {
"description": "A ListAgentsResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListAgentsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "List all agents.",
"parameters": []
},
"post": {
"responses": {
"200": {
@ -609,6 +640,47 @@
}
},
"/v1/agents/{agent_id}": {
"get": {
"responses": {
"200": {
"description": "An Agent of the agent.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Agent"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "Describe an agent by its ID.",
"parameters": [
{
"name": "agent_id",
"in": "path",
"description": "ID of the agent.",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"delete": {
"responses": {
"200": {
@ -2276,6 +2348,49 @@
]
}
},
"/v1/agents/{agent_id}/sessions": {
"get": {
"responses": {
"200": {
"description": "A ListAgentSessionsResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListAgentSessionsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "List all session(s) of a given agent.",
"parameters": [
{
"name": "agent_id",
"in": "path",
"description": "The ID of the agent to list sessions for.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/eval/benchmarks": {
"get": {
"responses": {
@ -6565,6 +6680,28 @@
"title": "ScoringResult",
"description": "A scoring result for a single row."
},
"Agent": {
"type": "object",
"properties": {
"agent_id": {
"type": "string"
},
"agent_config": {
"$ref": "#/components/schemas/AgentConfig"
},
"created_at": {
"type": "string",
"format": "date-time"
}
},
"additionalProperties": false,
"required": [
"agent_id",
"agent_config",
"created_at"
],
"title": "Agent"
},
"Session": {
"type": "object",
"properties": {
@ -7907,6 +8044,38 @@
],
"title": "ToolInvocationResult"
},
"ListAgentSessionsResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Session"
}
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "ListAgentSessionsResponse"
},
"ListAgentsResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Agent"
}
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "ListAgentsResponse"
},
"BucketResponse": {
"type": "object",
"properties": {
@ -9321,21 +9490,11 @@
"type": "object",
"properties": {
"tool_responses": {
"oneOf": [
{
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolResponse"
}
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolResponseMessage"
}
}
],
"description": "The tool call responses to resume the turn with. NOTE: ToolResponseMessage will be deprecated. Use ToolResponse."
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolResponse"
},
"description": "The tool call responses to resume the turn with."
},
"stream": {
"type": "boolean",

View file

@ -238,6 +238,28 @@ paths:
$ref: '#/components/schemas/CompletionRequest'
required: true
/v1/agents:
get:
responses:
'200':
description: A ListAgentsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListAgentsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: List all agents.
parameters: []
post:
responses:
'200':
@ -410,6 +432,34 @@ paths:
$ref: '#/components/schemas/CreateUploadSessionRequest'
required: true
/v1/agents/{agent_id}:
get:
responses:
'200':
description: An Agent of the agent.
content:
application/json:
schema:
$ref: '#/components/schemas/Agent'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: Describe an agent by its ID.
parameters:
- name: agent_id
in: path
description: ID of the agent.
required: true
schema:
type: string
delete:
responses:
'200':
@ -1528,6 +1578,36 @@ paths:
required: true
schema:
type: string
/v1/agents/{agent_id}/sessions:
get:
responses:
'200':
description: A ListAgentSessionsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListAgentSessionsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: List all session(s) of a given agent.
parameters:
- name: agent_id
in: path
description: >-
The ID of the agent to list sessions for.
required: true
schema:
type: string
/v1/eval/benchmarks:
get:
responses:
@ -4549,6 +4629,22 @@ components:
- aggregated_results
title: ScoringResult
description: A scoring result for a single row.
Agent:
type: object
properties:
agent_id:
type: string
agent_config:
$ref: '#/components/schemas/AgentConfig'
created_at:
type: string
format: date-time
additionalProperties: false
required:
- agent_id
- agent_config
- created_at
title: Agent
Session:
type: object
properties:
@ -5385,6 +5481,28 @@ components:
required:
- content
title: ToolInvocationResult
ListAgentSessionsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Session'
additionalProperties: false
required:
- data
title: ListAgentSessionsResponse
ListAgentsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Agent'
additionalProperties: false
required:
- data
title: ListAgentsResponse
BucketResponse:
type: object
properties:
@ -6287,16 +6405,11 @@ components:
type: object
properties:
tool_responses:
oneOf:
- type: array
items:
$ref: '#/components/schemas/ToolResponse'
- type: array
items:
$ref: '#/components/schemas/ToolResponseMessage'
type: array
items:
$ref: '#/components/schemas/ToolResponse'
description: >-
The tool call responses to resume the turn with. NOTE: ToolResponseMessage
will be deprecated. Use ToolResponse.
The tool call responses to resume the turn with.
stream:
type: boolean
description: Whether to stream the response.

View file

@ -1267,7 +1267,6 @@
}
],
"source": [
"# NBVAL_SKIP\n",
"from pydantic import BaseModel\n",
"\n",
"\n",
@ -1279,7 +1278,7 @@
"\n",
"user_input = \"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003. Extract this information into JSON for me. \"\n",
"response = client.inference.completion(\n",
" model_id=model_id,\n",
" model_id=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
" content=user_input,\n",
" stream=False,\n",
" sampling_params={\n",
@ -1640,7 +1639,7 @@
"agent = Agent(\n",
" client, \n",
" model=model_id,\n",
" instructions=\"You are a helpful assistant\",\n",
" instructions=\"You are a helpful assistant. Use websearch tool to help answer questions.\",\n",
" tools=[\"builtin::websearch\"],\n",
")\n",
"user_prompts = [\n",

View file

@ -1,9 +1 @@
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
Please install the following packages before running the script:
```
pip install fire PyYAML
```
Then simply run `sh run_openapi_generator.sh`

View file

@ -23,9 +23,12 @@ In this example, we will show you how to:
##### Building a Search Agent
```python
from llama_stack_client import LlamaStackClient
from llama_stack_client.lib.agents.agent import Agent
from llama_stack_client.lib.agents.event_logger import EventLogger
client = LlamaStackClient(base_url=f"http://{HOST}:{PORT}")
agent = Agent(
client,
model="meta-llama/Llama-3.3-70B-Instruct",
@ -33,7 +36,7 @@ agent = Agent(
tools=["builtin::websearch"],
)
user_prompts = [
"Which teams played in the NBA western conference finals of 2024. Search the web for the answer.",
"Which teams played in the NBA Western Conference Finals of 2024. Search the web for the answer.",
"In which episode and season of South Park does Bill Cosby (BSM-471) first appear? Give me the number and title. Search the web for the answer.",
"What is the British-American kickboxer Andrew Tate's kickboxing name? Search the web for the answer.",
]

View file

@ -33,6 +33,8 @@ Can be set to any of the following log levels:
The default global log level is `info`. `all` sets the log level for all components.
A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log`
### Llama Stack Build
In order to build your own distribution, we recommend you clone the `llama-stack` repository.

View file

@ -40,7 +40,6 @@ The following models are available by default:
- `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`

View file

@ -23,7 +23,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
| vector_io | `inline::sqlite-vec`, `remote::chromadb`, `remote::pgvector` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
@ -130,7 +130,7 @@ llama stack run ./run-with-safety.yaml \
### (Optional) Update Model Serving Configuration
```{note}
Please check the [model_entries](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
Please check the [model_entries](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/models.py) for the supported Ollama models.
```
To serve a new model with `ollama`

View file

@ -1,6 +1,6 @@
# llama (server-side) CLI Reference
The `llama` CLI tool helps you setup and use the Llama Stack. It should be available on your path after installing the `llama-stack` package.
The `llama` CLI tool helps you set up and use the Llama Stack. The CLI is available on your path after installing the `llama-stack` package.
## Installation
@ -27,9 +27,9 @@ You have two ways to install Llama Stack:
## `llama` subcommands
1. `download`: `llama` cli tools supports downloading the model from Meta or Hugging Face.
2. `model`: Lists available models and their properties.
3. `stack`: Allows you to build and run a Llama Stack server. You can read more about this [here](../../distributions/building_distro).
1. `download`: Supports downloading models from Meta or Hugging Face. [Downloading models](#downloading-models)
2. `model`: Lists available models and their properties. [Understanding models](#understand-the-models)
3. `stack`: Allows you to build a stack using the `llama stack` distribution and run a Llama Stack server. You can read more about how to build a Llama Stack distribution in the [Build your own Distribution](../../distributions/building_distro) documentation.
### Sample Usage
@ -117,7 +117,7 @@ You should see a table like this:
+----------------------------------+------------------------------------------+----------------+
```
To download models, you can use the llama download command.
To download models, you can use the `llama download` command.
### Downloading from [Meta](https://llama.meta.com/llama-downloads/)
@ -191,7 +191,7 @@ You should see a table like this:
The `llama model` command helps you explore the models interface.
1. `download`: Download the model from different sources. (meta, huggingface)
2. `list`: Lists all the models available for download with hardware requirements to deploy the models.
2. `list`: Lists all the models available for download with hardware requirements for deploying the models.
3. `prompt-format`: Show llama model message formats.
4. `describe`: Describes all the properties of the model.
@ -262,13 +262,12 @@ llama model prompt-format -m Llama3.2-3B-Instruct
![alt text](../../../resources/prompt-format.png)
You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.
**NOTE**: Outputs in terminal are color printed to show special tokens.
### Remove model
You can run `llama model remove` to remove unecessary model:
You can run `llama model remove` to remove an unnecessary model:
```
llama model remove -m Llama-Guard-3-8B-int8

View file

@ -40,7 +40,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
ollama run llama3.2:3b-instruct-fp16 --keepalive -1m
```
**Note**:
- The supported models for llama stack for now is listed in [here](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L43)
- The supported models for llama stack for now is listed in [here](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/models.py)
- `keepalive -1m` is used so that ollama continues to keep the model in memory indefinitely. Otherwise, ollama frees up memory and you would have to run `ollama run` again.
---