mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-20 03:40:05 +00:00
Merge branch 'main' into feat/add-url-to-paginated-response
This commit is contained in:
commit
b5047db685
24 changed files with 911 additions and 856 deletions
102
docs/_static/llama-stack-spec.html
vendored
102
docs/_static/llama-stack-spec.html
vendored
|
@ -3318,7 +3318,7 @@
|
|||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
|
||||
"required": true,
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
|
@ -3327,7 +3327,7 @@
|
|||
"name": "order",
|
||||
"in": "query",
|
||||
"description": "Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.",
|
||||
"required": true,
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
|
@ -3864,7 +3864,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/VectorStoreSearchResponse"
|
||||
"$ref": "#/components/schemas/VectorStoreSearchResponsePage"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12587,6 +12587,9 @@
|
|||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name"
|
||||
],
|
||||
"title": "OpenaiCreateVectorStoreRequest"
|
||||
},
|
||||
"VectorStoreObject": {
|
||||
|
@ -13129,13 +13132,74 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"query",
|
||||
"max_num_results",
|
||||
"rewrite_query"
|
||||
"query"
|
||||
],
|
||||
"title": "OpenaiSearchVectorStoreRequest"
|
||||
},
|
||||
"VectorStoreContent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "text"
|
||||
},
|
||||
"text": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"text"
|
||||
],
|
||||
"title": "VectorStoreContent"
|
||||
},
|
||||
"VectorStoreSearchResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"filename": {
|
||||
"type": "string"
|
||||
},
|
||||
"score": {
|
||||
"type": "number"
|
||||
},
|
||||
"attributes": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"content": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/VectorStoreContent"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"file_id",
|
||||
"filename",
|
||||
"score",
|
||||
"content"
|
||||
],
|
||||
"title": "VectorStoreSearchResponse",
|
||||
"description": "Response from searching a vector store."
|
||||
},
|
||||
"VectorStoreSearchResponsePage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
|
@ -13148,29 +13212,7 @@
|
|||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
"$ref": "#/components/schemas/VectorStoreSearchResponse"
|
||||
}
|
||||
},
|
||||
"has_more": {
|
||||
|
@ -13188,7 +13230,7 @@
|
|||
"data",
|
||||
"has_more"
|
||||
],
|
||||
"title": "VectorStoreSearchResponse",
|
||||
"title": "VectorStoreSearchResponsePage",
|
||||
"description": "Response from searching a vector store."
|
||||
},
|
||||
"OpenaiUpdateVectorStoreRequest": {
|
||||
|
|
63
docs/_static/llama-stack-spec.yaml
vendored
63
docs/_static/llama-stack-spec.yaml
vendored
|
@ -2323,7 +2323,7 @@ paths:
|
|||
description: >-
|
||||
A limit on the number of objects to be returned. Limit can range between
|
||||
1 and 100, and the default is 20.
|
||||
required: true
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
- name: order
|
||||
|
@ -2331,7 +2331,7 @@ paths:
|
|||
description: >-
|
||||
Sort order by the `created_at` timestamp of the objects. `asc` for ascending
|
||||
order and `desc` for descending order.
|
||||
required: true
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: after
|
||||
|
@ -2734,7 +2734,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/VectorStoreSearchResponse'
|
||||
$ref: '#/components/schemas/VectorStoreSearchResponsePage'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -8794,6 +8794,8 @@ components:
|
|||
description: >-
|
||||
The provider-specific vector database ID.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
title: OpenaiCreateVectorStoreRequest
|
||||
VectorStoreObject:
|
||||
type: object
|
||||
|
@ -9190,10 +9192,49 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- query
|
||||
- max_num_results
|
||||
- rewrite_query
|
||||
title: OpenaiSearchVectorStoreRequest
|
||||
VectorStoreContent:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: text
|
||||
text:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- text
|
||||
title: VectorStoreContent
|
||||
VectorStoreSearchResponse:
|
||||
type: object
|
||||
properties:
|
||||
file_id:
|
||||
type: string
|
||||
filename:
|
||||
type: string
|
||||
score:
|
||||
type: number
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: number
|
||||
- type: boolean
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/VectorStoreContent'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- file_id
|
||||
- filename
|
||||
- score
|
||||
- content
|
||||
title: VectorStoreSearchResponse
|
||||
description: Response from searching a vector store.
|
||||
VectorStoreSearchResponsePage:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
|
@ -9204,15 +9245,7 @@ components:
|
|||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
$ref: '#/components/schemas/VectorStoreSearchResponse'
|
||||
has_more:
|
||||
type: boolean
|
||||
default: false
|
||||
|
@ -9224,7 +9257,7 @@ components:
|
|||
- search_query
|
||||
- data
|
||||
- has_more
|
||||
title: VectorStoreSearchResponse
|
||||
title: VectorStoreSearchResponsePage
|
||||
description: Response from searching a vector store.
|
||||
OpenaiUpdateVectorStoreRequest:
|
||||
type: object
|
||||
|
|
|
@ -56,10 +56,10 @@ shields: []
|
|||
server:
|
||||
port: 8321
|
||||
auth:
|
||||
provider_type: "kubernetes"
|
||||
provider_type: "oauth2_token"
|
||||
config:
|
||||
api_server_url: "https://kubernetes.default.svc"
|
||||
ca_cert_path: "/path/to/ca.crt"
|
||||
jwks:
|
||||
uri: "https://my-token-issuing-svc.com/jwks"
|
||||
```
|
||||
|
||||
Let's break this down into the different sections. The first section specifies the set of APIs that the stack server will serve:
|
||||
|
@ -132,16 +132,52 @@ The server supports multiple authentication providers:
|
|||
|
||||
#### OAuth 2.0/OpenID Connect Provider with Kubernetes
|
||||
|
||||
The Kubernetes cluster must be configured to use a service account for authentication.
|
||||
The server can be configured to use service account tokens for authorization, validating these against the Kubernetes API server, e.g.:
|
||||
```yaml
|
||||
server:
|
||||
auth:
|
||||
provider_type: "oauth2_token"
|
||||
config:
|
||||
jwks:
|
||||
uri: "https://kubernetes.default.svc:8443/openid/v1/jwks"
|
||||
token: "${env.TOKEN:}"
|
||||
key_recheck_period: 3600
|
||||
tls_cafile: "/path/to/ca.crt"
|
||||
issuer: "https://kubernetes.default.svc"
|
||||
audience: "https://kubernetes.default.svc"
|
||||
```
|
||||
|
||||
To find your cluster's jwks uri (from which the public key(s) to verify the token signature are obtained), run:
|
||||
```
|
||||
kubectl get --raw /.well-known/openid-configuration| jq -r .jwks_uri
|
||||
```
|
||||
|
||||
For the tls_cafile, you can use the CA certificate of the OIDC provider:
|
||||
```bash
|
||||
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}'
|
||||
```
|
||||
|
||||
For the issuer, you can use the OIDC provider's URL:
|
||||
```bash
|
||||
kubectl get --raw /.well-known/openid-configuration| jq .issuer
|
||||
```
|
||||
|
||||
The audience can be obtained from a token, e.g. run:
|
||||
```bash
|
||||
kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq .aud
|
||||
```
|
||||
|
||||
The jwks token is used to authorize access to the jwks endpoint. You can obtain a token by running:
|
||||
|
||||
```bash
|
||||
kubectl create namespace llama-stack
|
||||
kubectl create serviceaccount llama-stack-auth -n llama-stack
|
||||
kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
|
||||
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
|
||||
export TOKEN=$(cat llama-stack-auth-token)
|
||||
```
|
||||
|
||||
Make sure the `kube-apiserver` runs with `--anonymous-auth=true` to allow unauthenticated requests
|
||||
Alternatively, you can configure the jwks endpoint to allow anonymous access. To do this, make sure
|
||||
the `kube-apiserver` runs with `--anonymous-auth=true` to allow unauthenticated requests
|
||||
and that the correct RoleBinding is created to allow the service account to access the necessary
|
||||
resources. If that is not the case, you can create a RoleBinding for the service account to access
|
||||
the necessary resources:
|
||||
|
@ -175,35 +211,6 @@ And then apply the configuration:
|
|||
kubectl apply -f allow-anonymous-openid.yaml
|
||||
```
|
||||
|
||||
Validates tokens against the Kubernetes API server through the OIDC provider:
|
||||
```yaml
|
||||
server:
|
||||
auth:
|
||||
provider_type: "oauth2_token"
|
||||
config:
|
||||
jwks:
|
||||
uri: "https://kubernetes.default.svc"
|
||||
key_recheck_period: 3600
|
||||
tls_cafile: "/path/to/ca.crt"
|
||||
issuer: "https://kubernetes.default.svc"
|
||||
audience: "https://kubernetes.default.svc"
|
||||
```
|
||||
|
||||
To find your cluster's audience, run:
|
||||
```bash
|
||||
kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq .aud
|
||||
```
|
||||
|
||||
For the issuer, you can use the OIDC provider's URL:
|
||||
```bash
|
||||
kubectl get --raw /.well-known/openid-configuration| jq .issuer
|
||||
```
|
||||
|
||||
For the tls_cafile, you can use the CA certificate of the OIDC provider:
|
||||
```bash
|
||||
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}'
|
||||
```
|
||||
|
||||
The provider extracts user information from the JWT token:
|
||||
- Username from the `sub` claim becomes a role
|
||||
- Kubernetes groups become teams
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue