mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-07 04:45:44 +00:00
Merge branch 'main' into langchain_llamastack
This commit is contained in:
commit
c2efb5556f
127 changed files with 5090 additions and 504 deletions
132
docs/_static/llama-stack-spec.html
vendored
132
docs/_static/llama-stack-spec.html
vendored
|
@ -4605,6 +4605,49 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/inference/rerank": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "RerankResponse with indices sorted by relevance score (descending).",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RerankResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"description": "Rerank a list of documents based on their relevance to a query.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RerankRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -16587,6 +16630,95 @@
|
|||
],
|
||||
"title": "RegisterVectorDbRequest"
|
||||
},
|
||||
"RerankRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the reranking model to use."
|
||||
},
|
||||
"query": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
|
||||
}
|
||||
],
|
||||
"description": "The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length."
|
||||
},
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length."
|
||||
},
|
||||
"max_num_results": {
|
||||
"type": "integer",
|
||||
"description": "(Optional) Maximum number of results to return. Default: returns all."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model",
|
||||
"query",
|
||||
"items"
|
||||
],
|
||||
"title": "RerankRequest"
|
||||
},
|
||||
"RerankData": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "The original index of the document in the input list"
|
||||
},
|
||||
"relevance_score": {
|
||||
"type": "number",
|
||||
"description": "The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"index",
|
||||
"relevance_score"
|
||||
],
|
||||
"title": "RerankData",
|
||||
"description": "A single rerank result from a reranking response."
|
||||
},
|
||||
"RerankResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/RerankData"
|
||||
},
|
||||
"description": "List of rerank result objects, sorted by relevance score (descending)"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "RerankResponse",
|
||||
"description": "Response from a reranking request."
|
||||
},
|
||||
"ResumeAgentTurnRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
101
docs/_static/llama-stack-spec.yaml
vendored
101
docs/_static/llama-stack-spec.yaml
vendored
|
@ -3264,6 +3264,37 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/QueryTracesRequest'
|
||||
required: true
|
||||
/v1/inference/rerank:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
RerankResponse with indices sorted by relevance score (descending).
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RerankResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
description: >-
|
||||
Rerank a list of documents based on their relevance to a query.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RerankRequest'
|
||||
required: true
|
||||
/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume:
|
||||
post:
|
||||
responses:
|
||||
|
@ -12337,6 +12368,76 @@ components:
|
|||
- vector_db_id
|
||||
- embedding_model
|
||||
title: RegisterVectorDbRequest
|
||||
RerankRequest:
|
||||
type: object
|
||||
properties:
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the reranking model to use.
|
||||
query:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
||||
description: >-
|
||||
The search query to rank items against. Can be a string, text content
|
||||
part, or image content part. The input must not exceed the model's max
|
||||
input token length.
|
||||
items:
|
||||
type: array
|
||||
items:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
||||
description: >-
|
||||
List of items to rerank. Each item can be a string, text content part,
|
||||
or image content part. Each input must not exceed the model's max input
|
||||
token length.
|
||||
max_num_results:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) Maximum number of results to return. Default: returns all.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
- query
|
||||
- items
|
||||
title: RerankRequest
|
||||
RerankData:
|
||||
type: object
|
||||
properties:
|
||||
index:
|
||||
type: integer
|
||||
description: >-
|
||||
The original index of the document in the input list
|
||||
relevance_score:
|
||||
type: number
|
||||
description: >-
|
||||
The relevance score from the model output. Values are inverted when applicable
|
||||
so that higher scores indicate greater relevance.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- index
|
||||
- relevance_score
|
||||
title: RerankData
|
||||
description: >-
|
||||
A single rerank result from a reranking response.
|
||||
RerankResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/RerankData'
|
||||
description: >-
|
||||
List of rerank result objects, sorted by relevance score (descending)
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: RerankResponse
|
||||
description: Response from a reranking request.
|
||||
ResumeAgentTurnRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
@ -225,8 +225,32 @@ server:
|
|||
port: 8321 # Port to listen on (default: 8321)
|
||||
tls_certfile: "/path/to/cert.pem" # Optional: Path to TLS certificate for HTTPS
|
||||
tls_keyfile: "/path/to/key.pem" # Optional: Path to TLS key for HTTPS
|
||||
cors: true # Optional: Enable CORS (dev mode) or full config object
|
||||
```
|
||||
|
||||
### CORS Configuration
|
||||
|
||||
CORS (Cross-Origin Resource Sharing) can be configured in two ways:
|
||||
|
||||
**Local development** (allows localhost origins only):
|
||||
```yaml
|
||||
server:
|
||||
cors: true
|
||||
```
|
||||
|
||||
**Explicit configuration** (custom origins and settings):
|
||||
```yaml
|
||||
server:
|
||||
cors:
|
||||
allow_origins: ["https://myapp.com", "https://app.example.com"]
|
||||
allow_methods: ["GET", "POST", "PUT", "DELETE"]
|
||||
allow_headers: ["Content-Type", "Authorization"]
|
||||
allow_credentials: true
|
||||
max_age: 3600
|
||||
```
|
||||
|
||||
When `cors: true`, the server enables secure localhost-only access for local development. For production, specify exact origins to maintain security.
|
||||
|
||||
### Authentication Configuration
|
||||
|
||||
> **Breaking Change (v0.2.14)**: The authentication configuration structure has changed. The previous format with `provider_type` and `config` fields has been replaced with a unified `provider_config` field that includes the `type` field. Update your configuration files accordingly.
|
||||
|
@ -618,6 +642,54 @@ Content-Type: application/json
|
|||
}
|
||||
```
|
||||
|
||||
### CORS Configuration
|
||||
|
||||
Configure CORS to allow web browsers to make requests from different domains. Disabled by default.
|
||||
|
||||
#### Quick Setup
|
||||
|
||||
For development, use the simple boolean flag:
|
||||
|
||||
```yaml
|
||||
server:
|
||||
cors: true # Auto-enables localhost with any port
|
||||
```
|
||||
|
||||
This automatically allows `http://localhost:*` and `https://localhost:*` with secure defaults.
|
||||
|
||||
#### Custom Configuration
|
||||
|
||||
For specific origins and full control:
|
||||
|
||||
```yaml
|
||||
server:
|
||||
cors:
|
||||
allow_origins: ["https://myapp.com", "https://staging.myapp.com"]
|
||||
allow_credentials: true
|
||||
allow_methods: ["GET", "POST", "PUT", "DELETE"]
|
||||
allow_headers: ["Content-Type", "Authorization"]
|
||||
allow_origin_regex: "https://.*\\.example\\.com" # Optional regex pattern
|
||||
expose_headers: ["X-Total-Count"]
|
||||
max_age: 86400
|
||||
```
|
||||
|
||||
#### Configuration Options
|
||||
|
||||
| Field | Description | Default |
|
||||
| -------------------- | ---------------------------------------------- | ------- |
|
||||
| `allow_origins` | List of allowed origins. Use `["*"]` for any. | `["*"]` |
|
||||
| `allow_origin_regex` | Regex pattern for allowed origins (optional). | `None` |
|
||||
| `allow_methods` | Allowed HTTP methods. | `["*"]` |
|
||||
| `allow_headers` | Allowed headers. | `["*"]` |
|
||||
| `allow_credentials` | Allow credentials (cookies, auth headers). | `false` |
|
||||
| `expose_headers` | Headers exposed to browser. | `[]` |
|
||||
| `max_age` | Preflight cache time (seconds). | `600` |
|
||||
|
||||
**Security Notes**:
|
||||
- `allow_credentials: true` requires explicit origins (no wildcards)
|
||||
- `cors: true` enables localhost access only (secure for development)
|
||||
- For public APIs, always specify exact allowed origins
|
||||
|
||||
## Extending to handle Safety
|
||||
|
||||
Configuring Safety can be a little involved so it is instructive to go through an example.
|
||||
|
|
|
@ -17,7 +17,6 @@ client = LlamaStackAsLibraryClient(
|
|||
# provider_data is optional, but if you need to pass in any provider specific data, you can do so here.
|
||||
provider_data={"tavily_search_api_key": os.environ["TAVILY_SEARCH_API_KEY"]},
|
||||
)
|
||||
client.initialize()
|
||||
```
|
||||
|
||||
This will parse your config and set up any inline implementations and remote clients needed for your implementation.
|
||||
|
@ -32,5 +31,4 @@ If you've created a [custom distribution](https://llama-stack.readthedocs.io/en/
|
|||
|
||||
```python
|
||||
client = LlamaStackAsLibraryClient(config_path)
|
||||
client.initialize()
|
||||
```
|
||||
|
|
|
@ -10,4 +10,5 @@ This section contains documentation for all available providers for the **files*
|
|||
:maxdepth: 1
|
||||
|
||||
inline_localfs
|
||||
remote_s3
|
||||
```
|
||||
|
|
33
docs/source/providers/files/remote_s3.md
Normal file
33
docs/source/providers/files/remote_s3.md
Normal file
|
@ -0,0 +1,33 @@
|
|||
# remote::s3
|
||||
|
||||
## Description
|
||||
|
||||
AWS S3-based file storage provider for scalable cloud file management with metadata persistence.
|
||||
|
||||
## Configuration
|
||||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `bucket_name` | `<class 'str'>` | No | | S3 bucket name to store files |
|
||||
| `region` | `<class 'str'>` | No | us-east-1 | AWS region where the bucket is located |
|
||||
| `aws_access_key_id` | `str \| None` | No | | AWS access key ID (optional if using IAM roles) |
|
||||
| `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) |
|
||||
| `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) |
|
||||
| `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist |
|
||||
| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
bucket_name: ${env.S3_BUCKET_NAME}
|
||||
region: ${env.AWS_REGION:=us-east-1}
|
||||
aws_access_key_id: ${env.AWS_ACCESS_KEY_ID:=}
|
||||
aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY:=}
|
||||
endpoint_url: ${env.S3_ENDPOINT_URL:=}
|
||||
auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/s3_files_metadata.db
|
||||
|
||||
```
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue