mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-20 11:47:00 +00:00
Merge branch 'main' into nvidia-e2e-notebook
This commit is contained in:
commit
012dd6891f
96 changed files with 4675 additions and 426 deletions
136
.github/workflows/integration-auth-tests.yml
vendored
Normal file
136
.github/workflows/integration-auth-tests.yml
vendored
Normal file
|
@ -0,0 +1,136 @@
|
|||
name: Integration Auth Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- 'distributions/**'
|
||||
- 'llama_stack/**'
|
||||
- 'tests/integration/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- '.github/workflows/integration-auth-tests.yml' # This workflow
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
auth-provider: [kubernetes]
|
||||
fail-fast: false # we want to run all tests regardless of failure
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Set Up Environment and Install Dependencies
|
||||
run: |
|
||||
uv sync --extra dev --extra test
|
||||
uv pip install -e .
|
||||
llama stack build --template ollama --image-type venv
|
||||
|
||||
- name: Install minikube
|
||||
if: ${{ matrix.auth-provider == 'kubernetes' }}
|
||||
uses: medyagh/setup-minikube@latest
|
||||
|
||||
- name: Start minikube
|
||||
if: ${{ matrix.auth-provider == 'kubernetes' }}
|
||||
run: |
|
||||
minikube start
|
||||
kubectl get pods -A
|
||||
|
||||
- name: Configure Kube Auth
|
||||
if: ${{ matrix.auth-provider == 'kubernetes' }}
|
||||
run: |
|
||||
kubectl create namespace llama-stack
|
||||
kubectl create serviceaccount llama-stack-auth -n llama-stack
|
||||
kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
|
||||
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
|
||||
|
||||
- name: Set Kubernetes Config
|
||||
if: ${{ matrix.auth-provider == 'kubernetes' }}
|
||||
run: |
|
||||
echo "KUBERNETES_API_SERVER_URL=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}')" >> $GITHUB_ENV
|
||||
echo "KUBERNETES_CA_CERT_PATH=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}')" >> $GITHUB_ENV
|
||||
|
||||
- name: Set Kube Auth Config and run server
|
||||
env:
|
||||
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
||||
if: ${{ matrix.auth-provider == 'kubernetes' }}
|
||||
run: |
|
||||
run_dir=$(mktemp -d)
|
||||
cat <<'EOF' > $run_dir/run.yaml
|
||||
version: '2'
|
||||
image_name: kube
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
|
||||
server:
|
||||
port: 8321
|
||||
EOF
|
||||
yq eval '.server.auth = {"provider_type": "${{ matrix.auth-provider }}"}' -i $run_dir/run.yaml
|
||||
yq eval '.server.auth.config = {"api_server_url": "${{ env.KUBERNETES_API_SERVER_URL }}", "ca_cert_path": "${{ env.KUBERNETES_CA_CERT_PATH }}"}' -i $run_dir/run.yaml
|
||||
cat $run_dir/run.yaml
|
||||
|
||||
source .venv/bin/activate
|
||||
nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
|
||||
|
||||
- name: Wait for Llama Stack server to be ready
|
||||
run: |
|
||||
echo "Waiting for Llama Stack server..."
|
||||
for i in {1..30}; do
|
||||
if curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://localhost:8321/v1/health | grep -q "OK"; then
|
||||
echo "Llama Stack server is up!"
|
||||
if grep -q "Enabling authentication with provider: ${{ matrix.auth-provider }}" server.log; then
|
||||
echo "Llama Stack server is configured to use ${{ matrix.auth-provider }} auth"
|
||||
exit 0
|
||||
else
|
||||
echo "Llama Stack server is not configured to use ${{ matrix.auth-provider }} auth"
|
||||
cat server.log
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "Llama Stack server failed to start"
|
||||
cat server.log
|
||||
exit 1
|
||||
|
||||
- name: Test auth
|
||||
run: |
|
||||
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers|jq
|
24
.github/workflows/integration-tests.yml
vendored
24
.github/workflows/integration-tests.yml
vendored
|
@ -33,19 +33,24 @@ jobs:
|
|||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
|
||||
uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
|
||||
with:
|
||||
python-version: "3.10"
|
||||
activate-environment: true
|
||||
|
||||
- name: Install and start Ollama
|
||||
run: |
|
||||
# the ollama installer also starts the ollama service
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
|
||||
- name: Pull Ollama image
|
||||
# Do NOT cache models - pulling the cache is actually slower than just pulling the model.
|
||||
# It takes ~45 seconds to pull the models from the cache and unpack it, but only 30 seconds to
|
||||
# pull them directly.
|
||||
# Maybe this is because the cache is being pulled at the same time by all the matrix jobs?
|
||||
- name: Pull Ollama models (instruct and embed)
|
||||
run: |
|
||||
# TODO: cache the model. OLLAMA_MODELS defaults to ~ollama/.ollama/models.
|
||||
ollama pull llama3.2:3b-instruct-fp16
|
||||
ollama pull all-minilm:latest
|
||||
|
||||
- name: Set Up Environment and Install Dependencies
|
||||
run: |
|
||||
|
@ -105,3 +110,16 @@ jobs:
|
|||
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
|
||||
--text-model="meta-llama/Llama-3.2-3B-Instruct" \
|
||||
--embedding-model=all-MiniLM-L6-v2
|
||||
|
||||
- name: Write ollama logs to file
|
||||
run: |
|
||||
sudo journalctl -u ollama.service > ollama.log
|
||||
|
||||
- name: Upload all logs to artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.client-type }}-${{ matrix.test-type }}
|
||||
path: |
|
||||
*.log
|
||||
retention-days: 1
|
||||
|
|
59
.github/workflows/providers-build.yml
vendored
59
.github/workflows/providers-build.yml
vendored
|
@ -56,7 +56,7 @@ jobs:
|
|||
python-version: '3.10'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
|
||||
uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
|
@ -94,7 +94,7 @@ jobs:
|
|||
python-version: '3.10'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
|
||||
uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
|
@ -120,7 +120,7 @@ jobs:
|
|||
python-version: '3.10'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
|
||||
uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
|
@ -145,3 +145,56 @@ jobs:
|
|||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
build-ubi9-container-distribution:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
|
||||
with:
|
||||
python-version: '3.10'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Install LlamaStack
|
||||
run: |
|
||||
uv venv
|
||||
source .venv/bin/activate
|
||||
uv pip install -e .
|
||||
|
||||
- name: Pin template to UBI9 base
|
||||
run: |
|
||||
yq -i '
|
||||
.image_type = "container" |
|
||||
.image_name = "ubi9-test" |
|
||||
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
||||
' llama_stack/templates/dev/build.yaml
|
||||
|
||||
- name: Build dev container (UBI9)
|
||||
env:
|
||||
USE_COPY_NOT_MOUNT: "true"
|
||||
LLAMA_STACK_DIR: "."
|
||||
run: |
|
||||
uv run llama stack build --config llama_stack/templates/dev/build.yaml
|
||||
|
||||
- name: Inspect UBI9 image
|
||||
run: |
|
||||
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Checking /etc/os-release in $IMAGE_ID"
|
||||
docker run --rm --entrypoint sh "$IMAGE_ID" -c \
|
||||
'source /etc/os-release && echo "$ID"' \
|
||||
| grep -qE '^(rhel|ubi)$' \
|
||||
|| { echo "Base image is not UBI 9!"; exit 1; }
|
||||
|
|
53
.github/workflows/test-external-providers.yml
vendored
53
.github/workflows/test-external-providers.yml
vendored
|
@ -26,22 +26,10 @@ jobs:
|
|||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Install Ollama
|
||||
run: |
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
|
||||
- name: Pull Ollama image
|
||||
run: |
|
||||
ollama pull llama3.2:3b-instruct-fp16
|
||||
|
||||
- name: Start Ollama in background
|
||||
run: |
|
||||
nohup ollama run llama3.2:3b-instruct-fp16 --keepalive=30m > ollama.log 2>&1 &
|
||||
|
||||
- name: Set Up Environment and Install Dependencies
|
||||
run: |
|
||||
uv sync --extra dev --extra test
|
||||
|
@ -66,21 +54,6 @@ jobs:
|
|||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
|
||||
|
||||
- name: Wait for Ollama to start
|
||||
run: |
|
||||
echo "Waiting for Ollama..."
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:11434 | grep -q "Ollama is running"; then
|
||||
echo "Ollama is running!"
|
||||
exit 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "Ollama failed to start"
|
||||
ollama ps
|
||||
ollama.log
|
||||
exit 1
|
||||
|
||||
- name: Start Llama Stack server in background
|
||||
if: ${{ matrix.image-type }} == 'venv'
|
||||
env:
|
||||
|
@ -92,24 +65,14 @@ jobs:
|
|||
|
||||
- name: Wait for Llama Stack server to be ready
|
||||
run: |
|
||||
echo "Waiting for Llama Stack server..."
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
|
||||
echo "Llama Stack server is up!"
|
||||
if grep -q "remote::custom_ollama from /tmp/providers.d/remote/inference/custom_ollama.yaml" server.log; then
|
||||
echo "Llama Stack server is using custom Ollama provider"
|
||||
exit 0
|
||||
else
|
||||
echo "Llama Stack server is not using custom Ollama provider"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
if ! grep -q "remote::custom_ollama from /tmp/providers.d/remote/inference/custom_ollama.yaml" server.log; then
|
||||
echo "Waiting for Llama Stack server to load the provider..."
|
||||
sleep 1
|
||||
else
|
||||
echo "Provider loaded"
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
echo "Llama Stack server failed to start"
|
||||
cat server.log
|
||||
echo "Provider failed to load"
|
||||
exit 1
|
||||
|
||||
- name: run inference tests
|
||||
run: |
|
||||
uv run pytest -v tests/integration/inference/test_text_inference.py --stack-config="http://localhost:8321" --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2
|
||||
|
|
2
.github/workflows/unit-tests.yml
vendored
2
.github/workflows/unit-tests.yml
vendored
|
@ -37,7 +37,7 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
|
||||
- uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
|
||||
- uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
|
||||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
enable-cache: false
|
||||
|
|
2
.github/workflows/update-readthedocs.yml
vendored
2
.github/workflows/update-readthedocs.yml
vendored
|
@ -41,7 +41,7 @@ jobs:
|
|||
python-version: '3.11'
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
|
||||
uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
|
||||
|
||||
- name: Sync with uv
|
||||
run: uv sync --extra docs
|
||||
|
|
512
docs/_static/llama-stack-spec.html
vendored
512
docs/_static/llama-stack-spec.html
vendored
|
@ -497,6 +497,54 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/responses": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Runtime representation of an annotated type.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObject"
|
||||
}
|
||||
},
|
||||
"text/event-stream": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStream"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "Create a new OpenAI response.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateOpenaiResponseRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/files": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -1278,6 +1326,49 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/responses/{id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "An OpenAIResponseObject.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObject"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "Retrieve an OpenAI response by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"description": "The ID of the OpenAI response to retrieve.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/scoring-functions/{scoring_fn_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -6192,6 +6283,427 @@
|
|||
],
|
||||
"title": "AgentTurnResponseTurnStartPayload"
|
||||
},
|
||||
"OpenAIResponseInputMessage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"role": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"const": "system"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "developer"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "user"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "assistant"
|
||||
}
|
||||
]
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "message",
|
||||
"default": "message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"role"
|
||||
],
|
||||
"title": "OpenAIResponseInputMessage"
|
||||
},
|
||||
"OpenAIResponseInputMessageContent": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseInputMessageContentImage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"detail": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"const": "low"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "high"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "auto"
|
||||
}
|
||||
],
|
||||
"default": "auto"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "input_image",
|
||||
"default": "input_image"
|
||||
},
|
||||
"image_url": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"detail",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseInputMessageContentImage"
|
||||
},
|
||||
"OpenAIResponseInputMessageContentText": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "input_text",
|
||||
"default": "input_text"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"text",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseInputMessageContentText"
|
||||
},
|
||||
"OpenAIResponseInputTool": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"const": "web_search"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "web_search_preview_2025_03_11"
|
||||
}
|
||||
],
|
||||
"default": "web_search"
|
||||
},
|
||||
"search_context_size": {
|
||||
"type": "string",
|
||||
"default": "medium"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseInputToolWebSearch"
|
||||
},
|
||||
"CreateOpenaiResponseRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessage"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Input message(s) to create the response."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The underlying LLM used for completions."
|
||||
},
|
||||
"previous_response_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
|
||||
},
|
||||
"store": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputTool"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input",
|
||||
"model"
|
||||
],
|
||||
"title": "CreateOpenaiResponseRequest"
|
||||
},
|
||||
"OpenAIResponseError": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"code",
|
||||
"message"
|
||||
],
|
||||
"title": "OpenAIResponseError"
|
||||
},
|
||||
"OpenAIResponseObject": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseError"
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "response",
|
||||
"default": "response"
|
||||
},
|
||||
"output": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutput"
|
||||
}
|
||||
},
|
||||
"parallel_tool_calls": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
},
|
||||
"previous_response_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
},
|
||||
"truncation": {
|
||||
"type": "string"
|
||||
},
|
||||
"user": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"created_at",
|
||||
"id",
|
||||
"model",
|
||||
"object",
|
||||
"output",
|
||||
"parallel_tool_calls",
|
||||
"status"
|
||||
],
|
||||
"title": "OpenAIResponseObject"
|
||||
},
|
||||
"OpenAIResponseOutput": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessage"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"message": "#/components/schemas/OpenAIResponseOutputMessage",
|
||||
"web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseOutputMessage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"content": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
|
||||
}
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"const": "assistant",
|
||||
"default": "assistant"
|
||||
},
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "message",
|
||||
"default": "message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"content",
|
||||
"role",
|
||||
"status",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseOutputMessage"
|
||||
},
|
||||
"OpenAIResponseOutputMessageContent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "output_text",
|
||||
"default": "output_text"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"text",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseOutputMessageContentOutputText"
|
||||
},
|
||||
"OpenAIResponseOutputMessageWebSearchToolCall": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "web_search_call",
|
||||
"default": "web_search_call"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"status",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseOutputMessageWebSearchToolCall"
|
||||
},
|
||||
"OpenAIResponseObjectStream": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
|
||||
"response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseObjectStreamResponseCompleted": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"response": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObject"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "response.completed",
|
||||
"default": "response.completed"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"response",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseObjectStreamResponseCompleted"
|
||||
},
|
||||
"OpenAIResponseObjectStreamResponseCreated": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"response": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObject"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "response.created",
|
||||
"default": "response.created"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"response",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseObjectStreamResponseCreated"
|
||||
},
|
||||
"CreateUploadSessionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
350
docs/_static/llama-stack-spec.yaml
vendored
350
docs/_static/llama-stack-spec.yaml
vendored
|
@ -330,6 +330,39 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/CreateAgentTurnRequest'
|
||||
required: true
|
||||
/v1/openai/v1/responses:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
Runtime representation of an annotated type.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIResponseObject'
|
||||
text/event-stream:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIResponseObjectStream'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: Create a new OpenAI response.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||
required: true
|
||||
/v1/files:
|
||||
get:
|
||||
responses:
|
||||
|
@ -875,6 +908,36 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/responses/{id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: An OpenAIResponseObject.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIResponseObject'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: Retrieve an OpenAI response by its ID.
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the OpenAI response to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/scoring-functions/{scoring_fn_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -4329,6 +4392,293 @@ components:
|
|||
- event_type
|
||||
- turn_id
|
||||
title: AgentTurnResponseTurnStartPayload
|
||||
OpenAIResponseInputMessage:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
|
||||
role:
|
||||
oneOf:
|
||||
- type: string
|
||||
const: system
|
||||
- type: string
|
||||
const: developer
|
||||
- type: string
|
||||
const: user
|
||||
- type: string
|
||||
const: assistant
|
||||
type:
|
||||
type: string
|
||||
const: message
|
||||
default: message
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- role
|
||||
title: OpenAIResponseInputMessage
|
||||
OpenAIResponseInputMessageContent:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
OpenAIResponseInputMessageContentImage:
|
||||
type: object
|
||||
properties:
|
||||
detail:
|
||||
oneOf:
|
||||
- type: string
|
||||
const: low
|
||||
- type: string
|
||||
const: high
|
||||
- type: string
|
||||
const: auto
|
||||
default: auto
|
||||
type:
|
||||
type: string
|
||||
const: input_image
|
||||
default: input_image
|
||||
image_url:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- detail
|
||||
- type
|
||||
title: OpenAIResponseInputMessageContentImage
|
||||
OpenAIResponseInputMessageContentText:
|
||||
type: object
|
||||
properties:
|
||||
text:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: input_text
|
||||
default: input_text
|
||||
additionalProperties: false
|
||||
required:
|
||||
- text
|
||||
- type
|
||||
title: OpenAIResponseInputMessageContentText
|
||||
OpenAIResponseInputTool:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
oneOf:
|
||||
- type: string
|
||||
const: web_search
|
||||
- type: string
|
||||
const: web_search_preview_2025_03_11
|
||||
default: web_search
|
||||
search_context_size:
|
||||
type: string
|
||||
default: medium
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: OpenAIResponseInputToolWebSearch
|
||||
CreateOpenaiResponseRequest:
|
||||
type: object
|
||||
properties:
|
||||
input:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInputMessage'
|
||||
description: Input message(s) to create the response.
|
||||
model:
|
||||
type: string
|
||||
description: The underlying LLM used for completions.
|
||||
previous_response_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) if specified, the new response will be a continuation of the
|
||||
previous response. This can be used to easily fork-off new responses from
|
||||
existing responses.
|
||||
store:
|
||||
type: boolean
|
||||
stream:
|
||||
type: boolean
|
||||
tools:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInputTool'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input
|
||||
- model
|
||||
title: CreateOpenaiResponseRequest
|
||||
OpenAIResponseError:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
message:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- code
|
||||
- message
|
||||
title: OpenAIResponseError
|
||||
OpenAIResponseObject:
|
||||
type: object
|
||||
properties:
|
||||
created_at:
|
||||
type: integer
|
||||
error:
|
||||
$ref: '#/components/schemas/OpenAIResponseError'
|
||||
id:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: response
|
||||
default: response
|
||||
output:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseOutput'
|
||||
parallel_tool_calls:
|
||||
type: boolean
|
||||
default: false
|
||||
previous_response_id:
|
||||
type: string
|
||||
status:
|
||||
type: string
|
||||
temperature:
|
||||
type: number
|
||||
top_p:
|
||||
type: number
|
||||
truncation:
|
||||
type: string
|
||||
user:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- created_at
|
||||
- id
|
||||
- model
|
||||
- object
|
||||
- output
|
||||
- parallel_tool_calls
|
||||
- status
|
||||
title: OpenAIResponseObject
|
||||
OpenAIResponseOutput:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessage'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
message: '#/components/schemas/OpenAIResponseOutputMessage'
|
||||
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
OpenAIResponseOutputMessage:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
|
||||
role:
|
||||
type: string
|
||||
const: assistant
|
||||
default: assistant
|
||||
status:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: message
|
||||
default: message
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- content
|
||||
- role
|
||||
- status
|
||||
- type
|
||||
title: OpenAIResponseOutputMessage
|
||||
OpenAIResponseOutputMessageContent:
|
||||
type: object
|
||||
properties:
|
||||
text:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: output_text
|
||||
default: output_text
|
||||
additionalProperties: false
|
||||
required:
|
||||
- text
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseOutputMessageContentOutputText
|
||||
"OpenAIResponseOutputMessageWebSearchToolCall":
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
status:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: web_search_call
|
||||
default: web_search_call
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- status
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseOutputMessageWebSearchToolCall
|
||||
OpenAIResponseObjectStream:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
|
||||
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
|
||||
"OpenAIResponseObjectStreamResponseCompleted":
|
||||
type: object
|
||||
properties:
|
||||
response:
|
||||
$ref: '#/components/schemas/OpenAIResponseObject'
|
||||
type:
|
||||
type: string
|
||||
const: response.completed
|
||||
default: response.completed
|
||||
additionalProperties: false
|
||||
required:
|
||||
- response
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseCompleted
|
||||
"OpenAIResponseObjectStreamResponseCreated":
|
||||
type: object
|
||||
properties:
|
||||
response:
|
||||
$ref: '#/components/schemas/OpenAIResponseObject'
|
||||
type:
|
||||
type: string
|
||||
const: response.created
|
||||
default: response.created
|
||||
additionalProperties: false
|
||||
required:
|
||||
- response
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseCreated
|
||||
CreateUploadSessionRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
907
docs/getting_started_llama_api.ipynb
Normal file
907
docs/getting_started_llama_api.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
@ -179,7 +179,7 @@ class ContentBuilder:
|
|||
"Creates the content subtree for a request or response."
|
||||
|
||||
def is_iterator_type(t):
|
||||
return "StreamChunk" in str(t)
|
||||
return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)
|
||||
|
||||
def get_media_type(t):
|
||||
if is_generic_list(t):
|
||||
|
|
|
@ -53,6 +53,13 @@ models:
|
|||
provider_id: ollama
|
||||
provider_model_id: null
|
||||
shields: []
|
||||
server:
|
||||
port: 8321
|
||||
auth:
|
||||
provider_type: "kubernetes"
|
||||
config:
|
||||
api_server_url: "https://kubernetes.default.svc"
|
||||
ca_cert_path: "/path/to/ca.crt"
|
||||
```
|
||||
|
||||
Let's break this down into the different sections. The first section specifies the set of APIs that the stack server will serve:
|
||||
|
@ -102,6 +109,105 @@ A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and i
|
|||
|
||||
What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
|
||||
|
||||
## Server Configuration
|
||||
|
||||
The `server` section configures the HTTP server that serves the Llama Stack APIs:
|
||||
|
||||
```yaml
|
||||
server:
|
||||
port: 8321 # Port to listen on (default: 8321)
|
||||
tls_certfile: "/path/to/cert.pem" # Optional: Path to TLS certificate for HTTPS
|
||||
tls_keyfile: "/path/to/key.pem" # Optional: Path to TLS key for HTTPS
|
||||
auth: # Optional: Authentication configuration
|
||||
provider_type: "kubernetes" # Type of auth provider
|
||||
config: # Provider-specific configuration
|
||||
api_server_url: "https://kubernetes.default.svc"
|
||||
ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
|
||||
```
|
||||
|
||||
### Authentication Configuration
|
||||
|
||||
The `auth` section configures authentication for the server. When configured, all API requests must include a valid Bearer token in the Authorization header:
|
||||
|
||||
```
|
||||
Authorization: Bearer <token>
|
||||
```
|
||||
|
||||
The server supports multiple authentication providers:
|
||||
|
||||
#### Kubernetes Provider
|
||||
|
||||
The Kubernetes cluster must be configured to use a service account for authentication.
|
||||
|
||||
```bash
|
||||
kubectl create namespace llama-stack
|
||||
kubectl create serviceaccount llama-stack-auth -n llama-stack
|
||||
kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
|
||||
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
|
||||
```
|
||||
|
||||
Validates tokens against the Kubernetes API server:
|
||||
```yaml
|
||||
server:
|
||||
auth:
|
||||
provider_type: "kubernetes"
|
||||
config:
|
||||
api_server_url: "https://kubernetes.default.svc" # URL of the Kubernetes API server
|
||||
ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
|
||||
```
|
||||
|
||||
The provider extracts user information from the JWT token:
|
||||
- Username from the `sub` claim becomes a role
|
||||
- Kubernetes groups become teams
|
||||
|
||||
You can easily validate a request by running:
|
||||
|
||||
```bash
|
||||
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
|
||||
```
|
||||
|
||||
#### Custom Provider
|
||||
Validates tokens against a custom authentication endpoint:
|
||||
```yaml
|
||||
server:
|
||||
auth:
|
||||
provider_type: "custom"
|
||||
config:
|
||||
endpoint: "https://auth.example.com/validate" # URL of the auth endpoint
|
||||
```
|
||||
|
||||
The custom endpoint receives a POST request with:
|
||||
```json
|
||||
{
|
||||
"api_key": "<token>",
|
||||
"request": {
|
||||
"path": "/api/v1/endpoint",
|
||||
"headers": {
|
||||
"content-type": "application/json",
|
||||
"user-agent": "curl/7.64.1"
|
||||
},
|
||||
"params": {
|
||||
"key": ["value"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
And must respond with:
|
||||
```json
|
||||
{
|
||||
"access_attributes": {
|
||||
"roles": ["admin", "user"],
|
||||
"teams": ["ml-team", "nlp-team"],
|
||||
"projects": ["llama-3", "project-x"],
|
||||
"namespaces": ["research"]
|
||||
},
|
||||
"message": "Authentication successful"
|
||||
}
|
||||
```
|
||||
|
||||
If no access attributes are returned, the token is used as a namespace.
|
||||
|
||||
## Extending to handle Safety
|
||||
|
||||
Configuring Safety can be a little involved so it is instructive to go through an example.
|
||||
|
|
|
@ -38,6 +38,13 @@ from llama_stack.apis.safety import SafetyViolation
|
|||
from llama_stack.apis.tools import ToolDef
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
from .openai_responses import (
|
||||
OpenAIResponseInputMessage,
|
||||
OpenAIResponseInputTool,
|
||||
OpenAIResponseObject,
|
||||
OpenAIResponseObjectStream,
|
||||
)
|
||||
|
||||
|
||||
class Attachment(BaseModel):
|
||||
"""An attachment to an agent turn.
|
||||
|
@ -593,3 +600,39 @@ class Agents(Protocol):
|
|||
:returns: A ListAgentSessionsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
# We situate the OpenAI Responses API in the Agents API just like we did things
|
||||
# for Inference. The Responses API, in its intent, serves the same purpose as
|
||||
# the Agents API above -- it is essentially a lightweight "agentic loop" with
|
||||
# integrated tool calling.
|
||||
#
|
||||
# Both of these APIs are inherently stateful.
|
||||
|
||||
@webmethod(route="/openai/v1/responses/{id}", method="GET")
|
||||
async def get_openai_response(
|
||||
self,
|
||||
id: str,
|
||||
) -> OpenAIResponseObject:
|
||||
"""Retrieve an OpenAI response by its ID.
|
||||
|
||||
:param id: The ID of the OpenAI response to retrieve.
|
||||
:returns: An OpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses", method="POST")
|
||||
async def create_openai_response(
|
||||
self,
|
||||
input: Union[str, List[OpenAIResponseInputMessage]],
|
||||
model: str,
|
||||
previous_response_id: Optional[str] = None,
|
||||
store: Optional[bool] = True,
|
||||
stream: Optional[bool] = False,
|
||||
tools: Optional[List[OpenAIResponseInputTool]] = None,
|
||||
) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]:
|
||||
"""Create a new OpenAI response.
|
||||
|
||||
:param input: Input message(s) to create the response.
|
||||
:param model: The underlying LLM used for completions.
|
||||
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
||||
"""
|
||||
|
|
140
llama_stack/apis/agents/openai_responses.py
Normal file
140
llama_stack/apis/agents/openai_responses.py
Normal file
|
@ -0,0 +1,140 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import List, Literal, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseError(BaseModel):
|
||||
code: str
|
||||
message: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageContentOutputText(BaseModel):
|
||||
text: str
|
||||
type: Literal["output_text"] = "output_text"
|
||||
|
||||
|
||||
OpenAIResponseOutputMessageContent = Annotated[
|
||||
Union[OpenAIResponseOutputMessageContentOutputText,],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessage(BaseModel):
|
||||
id: str
|
||||
content: List[OpenAIResponseOutputMessageContent]
|
||||
role: Literal["assistant"] = "assistant"
|
||||
status: str
|
||||
type: Literal["message"] = "message"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
|
||||
id: str
|
||||
status: str
|
||||
type: Literal["web_search_call"] = "web_search_call"
|
||||
|
||||
|
||||
OpenAIResponseOutput = Annotated[
|
||||
Union[
|
||||
OpenAIResponseOutputMessage,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObject(BaseModel):
|
||||
created_at: int
|
||||
error: Optional[OpenAIResponseError] = None
|
||||
id: str
|
||||
model: str
|
||||
object: Literal["response"] = "response"
|
||||
output: List[OpenAIResponseOutput]
|
||||
parallel_tool_calls: bool = False
|
||||
previous_response_id: Optional[str] = None
|
||||
status: str
|
||||
temperature: Optional[float] = None
|
||||
top_p: Optional[float] = None
|
||||
truncation: Optional[str] = None
|
||||
user: Optional[str] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseCreated(BaseModel):
|
||||
response: OpenAIResponseObject
|
||||
type: Literal["response.created"] = "response.created"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
|
||||
response: OpenAIResponseObject
|
||||
type: Literal["response.completed"] = "response.completed"
|
||||
|
||||
|
||||
OpenAIResponseObjectStream = Annotated[
|
||||
Union[
|
||||
OpenAIResponseObjectStreamResponseCreated,
|
||||
OpenAIResponseObjectStreamResponseCompleted,
|
||||
],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputMessageContentText(BaseModel):
|
||||
text: str
|
||||
type: Literal["input_text"] = "input_text"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputMessageContentImage(BaseModel):
|
||||
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
|
||||
type: Literal["input_image"] = "input_image"
|
||||
# TODO: handle file_id
|
||||
image_url: Optional[str] = None
|
||||
|
||||
|
||||
# TODO: handle file content types
|
||||
OpenAIResponseInputMessageContent = Annotated[
|
||||
Union[OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentImage],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputMessage(BaseModel):
|
||||
content: Union[str, List[OpenAIResponseInputMessageContent]]
|
||||
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
|
||||
type: Optional[Literal["message"]] = "message"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputToolWebSearch(BaseModel):
|
||||
type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
|
||||
# TODO: actually use search_context_size somewhere...
|
||||
search_context_size: Optional[str] = Field(default="medium", pattern="^low|medium|high$")
|
||||
# TODO: add user_location
|
||||
|
||||
|
||||
OpenAIResponseInputTool = Annotated[
|
||||
Union[OpenAIResponseInputToolWebSearch,],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
|
|
@ -460,15 +460,17 @@ def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
|
|||
from llama_stack.models.llama.sku_list import llama_meta_net_info, resolve_model
|
||||
|
||||
from .model.safety_models import (
|
||||
prompt_guard_download_info,
|
||||
prompt_guard_model_sku,
|
||||
prompt_guard_download_info_map,
|
||||
prompt_guard_model_sku_map,
|
||||
)
|
||||
|
||||
prompt_guard = prompt_guard_model_sku()
|
||||
prompt_guard_model_sku_map = prompt_guard_model_sku_map()
|
||||
prompt_guard_download_info_map = prompt_guard_download_info_map()
|
||||
|
||||
for model_id in model_ids:
|
||||
if model_id == prompt_guard.model_id:
|
||||
model = prompt_guard
|
||||
info = prompt_guard_download_info()
|
||||
if model_id in prompt_guard_model_sku_map.keys():
|
||||
model = prompt_guard_model_sku_map[model_id]
|
||||
info = prompt_guard_download_info_map[model_id]
|
||||
else:
|
||||
model = resolve_model(model_id)
|
||||
if model is None:
|
||||
|
|
|
@ -36,11 +36,11 @@ class ModelDescribe(Subcommand):
|
|||
)
|
||||
|
||||
def _run_model_describe_cmd(self, args: argparse.Namespace) -> None:
|
||||
from .safety_models import prompt_guard_model_sku
|
||||
from .safety_models import prompt_guard_model_sku_map
|
||||
|
||||
prompt_guard = prompt_guard_model_sku()
|
||||
if args.model_id == prompt_guard.model_id:
|
||||
model = prompt_guard
|
||||
prompt_guard_model_map = prompt_guard_model_sku_map()
|
||||
if args.model_id in prompt_guard_model_map.keys():
|
||||
model = prompt_guard_model_map[args.model_id]
|
||||
else:
|
||||
model = resolve_model(args.model_id)
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ class ModelList(Subcommand):
|
|||
)
|
||||
|
||||
def _run_model_list_cmd(self, args: argparse.Namespace) -> None:
|
||||
from .safety_models import prompt_guard_model_sku
|
||||
from .safety_models import prompt_guard_model_skus
|
||||
|
||||
if args.downloaded:
|
||||
return _run_model_list_downloaded_cmd()
|
||||
|
@ -96,7 +96,7 @@ class ModelList(Subcommand):
|
|||
]
|
||||
|
||||
rows = []
|
||||
for model in all_registered_models() + [prompt_guard_model_sku()]:
|
||||
for model in all_registered_models() + prompt_guard_model_skus():
|
||||
if not args.show_all and not model.is_featured:
|
||||
continue
|
||||
|
||||
|
|
|
@ -42,11 +42,12 @@ class ModelRemove(Subcommand):
|
|||
)
|
||||
|
||||
def _run_model_remove_cmd(self, args: argparse.Namespace) -> None:
|
||||
from .safety_models import prompt_guard_model_sku
|
||||
from .safety_models import prompt_guard_model_sku_map
|
||||
|
||||
prompt_guard = prompt_guard_model_sku()
|
||||
if args.model == prompt_guard.model_id:
|
||||
model = prompt_guard
|
||||
prompt_guard_model_map = prompt_guard_model_sku_map()
|
||||
|
||||
if args.model in prompt_guard_model_map.keys():
|
||||
model = prompt_guard_model_map[args.model]
|
||||
else:
|
||||
model = resolve_model(args.model)
|
||||
|
||||
|
|
|
@ -15,11 +15,11 @@ from llama_stack.models.llama.sku_types import CheckpointQuantizationFormat
|
|||
class PromptGuardModel(BaseModel):
|
||||
"""Make a 'fake' Model-like object for Prompt Guard. Eventually this will be removed."""
|
||||
|
||||
model_id: str = "Prompt-Guard-86M"
|
||||
model_id: str
|
||||
huggingface_repo: str
|
||||
description: str = "Prompt Guard. NOTE: this model will not be provided via `llama` CLI soon."
|
||||
is_featured: bool = False
|
||||
huggingface_repo: str = "meta-llama/Prompt-Guard-86M"
|
||||
max_seq_length: int = 2048
|
||||
max_seq_length: int = 512
|
||||
is_instruct_model: bool = False
|
||||
quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16
|
||||
arch_args: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
@ -30,13 +30,28 @@ class PromptGuardModel(BaseModel):
|
|||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
|
||||
def prompt_guard_model_sku():
|
||||
return PromptGuardModel()
|
||||
def prompt_guard_model_skus():
|
||||
return [
|
||||
PromptGuardModel(model_id="Prompt-Guard-86M", huggingface_repo="meta-llama/Prompt-Guard-86M"),
|
||||
PromptGuardModel(
|
||||
model_id="Llama-Prompt-Guard-2-86M",
|
||||
huggingface_repo="meta-llama/Llama-Prompt-Guard-2-86M",
|
||||
),
|
||||
PromptGuardModel(
|
||||
model_id="Llama-Prompt-Guard-2-22M",
|
||||
huggingface_repo="meta-llama/Llama-Prompt-Guard-2-22M",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def prompt_guard_download_info():
|
||||
return LlamaDownloadInfo(
|
||||
folder="Prompt-Guard",
|
||||
def prompt_guard_model_sku_map() -> Dict[str, Any]:
|
||||
return {model.model_id: model for model in prompt_guard_model_skus()}
|
||||
|
||||
|
||||
def prompt_guard_download_info_map() -> Dict[str, LlamaDownloadInfo]:
|
||||
return {
|
||||
model.model_id: LlamaDownloadInfo(
|
||||
folder="Prompt-Guard" if model.model_id == "Prompt-Guard-86M" else model.model_id,
|
||||
files=[
|
||||
"model.safetensors",
|
||||
"special_tokens_map.json",
|
||||
|
@ -45,3 +60,5 @@ def prompt_guard_download_info():
|
|||
],
|
||||
pth_size=1,
|
||||
)
|
||||
for model in prompt_guard_model_skus()
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Annotated, Any, Dict, List, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
@ -235,10 +236,21 @@ class LoggingConfig(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
class AuthProviderType(str, Enum):
|
||||
"""Supported authentication provider types."""
|
||||
|
||||
KUBERNETES = "kubernetes"
|
||||
CUSTOM = "custom"
|
||||
|
||||
|
||||
class AuthenticationConfig(BaseModel):
|
||||
endpoint: str = Field(
|
||||
provider_type: AuthProviderType = Field(
|
||||
...,
|
||||
description="Endpoint URL to validate authentication tokens",
|
||||
description="Type of authentication provider (e.g., 'kubernetes', 'custom')",
|
||||
)
|
||||
config: Dict[str, str] = Field(
|
||||
...,
|
||||
description="Provider-specific configuration",
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -5,74 +5,29 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
from typing import Dict, List, Optional
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.distribution.datatypes import AccessAttributes
|
||||
from llama_stack.distribution.server.auth_providers import AuthProviderConfig, create_auth_provider
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(name=__name__, category="auth")
|
||||
|
||||
|
||||
class AuthRequestContext(BaseModel):
|
||||
path: str = Field(description="The path of the request being authenticated")
|
||||
|
||||
headers: Dict[str, str] = Field(description="HTTP headers from the original request (excluding Authorization)")
|
||||
|
||||
params: Dict[str, List[str]] = Field(
|
||||
description="Query parameters from the original request, parsed as dictionary of lists"
|
||||
)
|
||||
|
||||
|
||||
class AuthRequest(BaseModel):
|
||||
api_key: str = Field(description="The API key extracted from the Authorization header")
|
||||
|
||||
request: AuthRequestContext = Field(description="Context information about the request being authenticated")
|
||||
|
||||
|
||||
class AuthResponse(BaseModel):
|
||||
"""The format of the authentication response from the auth endpoint."""
|
||||
|
||||
access_attributes: Optional[AccessAttributes] = Field(
|
||||
default=None,
|
||||
description="""
|
||||
Structured user attributes for attribute-based access control.
|
||||
|
||||
These attributes determine which resources the user can access.
|
||||
The model provides standard categories like "roles", "teams", "projects", and "namespaces".
|
||||
Each attribute category contains a list of values that the user has for that category.
|
||||
During access control checks, these values are compared against resource requirements.
|
||||
|
||||
Example with standard categories:
|
||||
```json
|
||||
{
|
||||
"roles": ["admin", "data-scientist"],
|
||||
"teams": ["ml-team"],
|
||||
"projects": ["llama-3"],
|
||||
"namespaces": ["research"]
|
||||
}
|
||||
```
|
||||
""",
|
||||
)
|
||||
|
||||
message: Optional[str] = Field(
|
||||
default=None, description="Optional message providing additional context about the authentication result."
|
||||
)
|
||||
|
||||
|
||||
class AuthenticationMiddleware:
|
||||
"""Middleware that authenticates requests using an external auth endpoint.
|
||||
"""Middleware that authenticates requests using configured authentication provider.
|
||||
|
||||
This middleware:
|
||||
1. Extracts the Bearer token from the Authorization header
|
||||
2. Sends it to the configured auth endpoint along with request details
|
||||
3. Validates the response and extracts user attributes
|
||||
2. Uses the configured auth provider to validate the token
|
||||
3. Extracts user attributes from the provider's response
|
||||
4. Makes these attributes available to the route handlers for access control
|
||||
|
||||
Authentication Request Format:
|
||||
The middleware supports multiple authentication providers through the AuthProvider interface:
|
||||
- Kubernetes: Validates tokens against the Kubernetes API server
|
||||
- Custom: Validates tokens against a custom endpoint
|
||||
|
||||
Authentication Request Format for Custom Auth Provider:
|
||||
```json
|
||||
{
|
||||
"api_key": "the-api-key-extracted-from-auth-header",
|
||||
|
@ -105,21 +60,26 @@ class AuthenticationMiddleware:
|
|||
}
|
||||
```
|
||||
|
||||
Token Validation:
|
||||
Each provider implements its own token validation logic:
|
||||
- Kubernetes: Uses TokenReview API to validate service account tokens
|
||||
- Custom: Sends token to custom endpoint for validation
|
||||
|
||||
Attribute-Based Access Control:
|
||||
The attributes returned by the auth endpoint are used to determine which
|
||||
The attributes returned by the auth provider are used to determine which
|
||||
resources the user can access. Resources can specify required attributes
|
||||
using the access_attributes field. For a user to access a resource:
|
||||
|
||||
1. All attribute categories specified in the resource must be present in the user's attributes
|
||||
2. For each category, the user must have at least one matching value
|
||||
|
||||
If the auth endpoint doesn't return any attributes, the user will only be able to
|
||||
If the auth provider doesn't return any attributes, the user will only be able to
|
||||
access resources that don't have access_attributes defined.
|
||||
"""
|
||||
|
||||
def __init__(self, app, auth_endpoint):
|
||||
def __init__(self, app, auth_config: AuthProviderConfig):
|
||||
self.app = app
|
||||
self.auth_endpoint = auth_endpoint
|
||||
self.auth_provider = create_auth_provider(auth_config)
|
||||
|
||||
async def __call__(self, scope, receive, send):
|
||||
if scope["type"] == "http":
|
||||
|
@ -129,66 +89,34 @@ class AuthenticationMiddleware:
|
|||
if not auth_header or not auth_header.startswith("Bearer "):
|
||||
return await self._send_auth_error(send, "Missing or invalid Authorization header")
|
||||
|
||||
api_key = auth_header.split("Bearer ", 1)[1]
|
||||
token = auth_header.split("Bearer ", 1)[1]
|
||||
|
||||
path = scope.get("path", "")
|
||||
request_headers = {k.decode(): v.decode() for k, v in headers.items()}
|
||||
|
||||
# Remove sensitive headers
|
||||
if "authorization" in request_headers:
|
||||
del request_headers["authorization"]
|
||||
|
||||
query_string = scope.get("query_string", b"").decode()
|
||||
params = parse_qs(query_string)
|
||||
|
||||
# Build the auth request model
|
||||
auth_request = AuthRequest(
|
||||
api_key=api_key,
|
||||
request=AuthRequestContext(
|
||||
path=path,
|
||||
headers=request_headers,
|
||||
params=params,
|
||||
),
|
||||
)
|
||||
|
||||
# Validate with authentication endpoint
|
||||
# Validate token and get access attributes
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
self.auth_endpoint,
|
||||
json=auth_request.model_dump(),
|
||||
timeout=10.0, # Add a reasonable timeout
|
||||
)
|
||||
if response.status_code != 200:
|
||||
logger.warning(f"Authentication failed: {response.status_code}")
|
||||
return await self._send_auth_error(send, "Authentication failed")
|
||||
|
||||
# Parse and validate the auth response
|
||||
try:
|
||||
response_data = response.json()
|
||||
auth_response = AuthResponse(**response_data)
|
||||
|
||||
# Store attributes in request scope for access control
|
||||
if auth_response.access_attributes:
|
||||
user_attributes = auth_response.access_attributes.model_dump(exclude_none=True)
|
||||
else:
|
||||
logger.warning("No access attributes, setting namespace to api_key by default")
|
||||
user_attributes = {
|
||||
"namespaces": [api_key],
|
||||
}
|
||||
|
||||
scope["user_attributes"] = user_attributes
|
||||
logger.debug(f"Authentication successful: {len(user_attributes)} attributes")
|
||||
except Exception:
|
||||
logger.exception("Error parsing authentication response")
|
||||
return await self._send_auth_error(send, "Invalid authentication response format")
|
||||
access_attributes = await self.auth_provider.validate_token(token, scope)
|
||||
except httpx.TimeoutException:
|
||||
logger.exception("Authentication request timed out")
|
||||
return await self._send_auth_error(send, "Authentication service timeout")
|
||||
except ValueError as e:
|
||||
logger.exception("Error during authentication")
|
||||
return await self._send_auth_error(send, str(e))
|
||||
except Exception:
|
||||
logger.exception("Error during authentication")
|
||||
return await self._send_auth_error(send, "Authentication service error")
|
||||
|
||||
# Store attributes in request scope for access control
|
||||
if access_attributes:
|
||||
user_attributes = access_attributes.model_dump(exclude_none=True)
|
||||
else:
|
||||
logger.warning("No access attributes, setting namespace to token by default")
|
||||
user_attributes = {
|
||||
"namespaces": [token],
|
||||
}
|
||||
|
||||
# Store attributes in request scope
|
||||
scope["user_attributes"] = user_attributes
|
||||
logger.debug(f"Authentication successful: {len(scope['user_attributes'])} attributes")
|
||||
|
||||
return await self.app(scope, receive, send)
|
||||
|
||||
async def _send_auth_error(self, send, message):
|
||||
|
|
262
llama_stack/distribution/server/auth_providers.py
Normal file
262
llama_stack/distribution/server/auth_providers.py
Normal file
|
@ -0,0 +1,262 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Optional
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.distribution.datatypes import AccessAttributes
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(name=__name__, category="auth")
|
||||
|
||||
|
||||
class AuthResponse(BaseModel):
|
||||
"""The format of the authentication response from the auth endpoint."""
|
||||
|
||||
access_attributes: Optional[AccessAttributes] = Field(
|
||||
default=None,
|
||||
description="""
|
||||
Structured user attributes for attribute-based access control.
|
||||
|
||||
These attributes determine which resources the user can access.
|
||||
The model provides standard categories like "roles", "teams", "projects", and "namespaces".
|
||||
Each attribute category contains a list of values that the user has for that category.
|
||||
During access control checks, these values are compared against resource requirements.
|
||||
|
||||
Example with standard categories:
|
||||
```json
|
||||
{
|
||||
"roles": ["admin", "data-scientist"],
|
||||
"teams": ["ml-team"],
|
||||
"projects": ["llama-3"],
|
||||
"namespaces": ["research"]
|
||||
}
|
||||
```
|
||||
""",
|
||||
)
|
||||
|
||||
message: Optional[str] = Field(
|
||||
default=None, description="Optional message providing additional context about the authentication result."
|
||||
)
|
||||
|
||||
|
||||
class AuthRequestContext(BaseModel):
|
||||
path: str = Field(description="The path of the request being authenticated")
|
||||
|
||||
headers: Dict[str, str] = Field(description="HTTP headers from the original request (excluding Authorization)")
|
||||
|
||||
params: Dict[str, List[str]] = Field(
|
||||
description="Query parameters from the original request, parsed as dictionary of lists"
|
||||
)
|
||||
|
||||
|
||||
class AuthRequest(BaseModel):
|
||||
api_key: str = Field(description="The API key extracted from the Authorization header")
|
||||
|
||||
request: AuthRequestContext = Field(description="Context information about the request being authenticated")
|
||||
|
||||
|
||||
class AuthProviderType(str, Enum):
|
||||
"""Supported authentication provider types."""
|
||||
|
||||
KUBERNETES = "kubernetes"
|
||||
CUSTOM = "custom"
|
||||
|
||||
|
||||
class AuthProviderConfig(BaseModel):
|
||||
"""Base configuration for authentication providers."""
|
||||
|
||||
provider_type: AuthProviderType = Field(..., description="Type of authentication provider")
|
||||
config: Dict[str, str] = Field(..., description="Provider-specific configuration")
|
||||
|
||||
|
||||
class AuthProvider(ABC):
|
||||
"""Abstract base class for authentication providers."""
|
||||
|
||||
@abstractmethod
|
||||
async def validate_token(self, token: str, scope: Optional[Dict] = None) -> Optional[AccessAttributes]:
|
||||
"""Validate a token and return access attributes."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def close(self):
|
||||
"""Clean up any resources."""
|
||||
pass
|
||||
|
||||
|
||||
class KubernetesAuthProvider(AuthProvider):
|
||||
"""Kubernetes authentication provider that validates tokens against the Kubernetes API server."""
|
||||
|
||||
def __init__(self, config: Dict[str, str]):
|
||||
self.api_server_url = config["api_server_url"]
|
||||
self.ca_cert_path = config.get("ca_cert_path")
|
||||
self._client = None
|
||||
|
||||
async def _get_client(self):
|
||||
"""Get or create a Kubernetes client."""
|
||||
if self._client is None:
|
||||
# kubernetes-client has not async support, see:
|
||||
# https://github.com/kubernetes-client/python/issues/323
|
||||
from kubernetes import client
|
||||
from kubernetes.client import ApiClient
|
||||
|
||||
# Configure the client
|
||||
configuration = client.Configuration()
|
||||
configuration.host = self.api_server_url
|
||||
if self.ca_cert_path:
|
||||
configuration.ssl_ca_cert = self.ca_cert_path
|
||||
configuration.verify_ssl = bool(self.ca_cert_path)
|
||||
|
||||
# Create API client
|
||||
self._client = ApiClient(configuration)
|
||||
return self._client
|
||||
|
||||
async def validate_token(self, token: str, scope: Optional[Dict] = None) -> Optional[AccessAttributes]:
|
||||
"""Validate a Kubernetes token and return access attributes."""
|
||||
try:
|
||||
client = await self._get_client()
|
||||
|
||||
# Set the token in the client
|
||||
client.set_default_header("Authorization", f"Bearer {token}")
|
||||
|
||||
# Make a request to validate the token
|
||||
# We use the /api endpoint which requires authentication
|
||||
from kubernetes.client import CoreV1Api
|
||||
|
||||
api = CoreV1Api(client)
|
||||
api.get_api_resources(_request_timeout=3.0) # Set timeout for this specific request
|
||||
|
||||
# If we get here, the token is valid
|
||||
# Extract user info from the token claims
|
||||
import base64
|
||||
|
||||
# Decode the token (without verification since we've already validated it)
|
||||
token_parts = token.split(".")
|
||||
payload = json.loads(base64.b64decode(token_parts[1] + "=" * (-len(token_parts[1]) % 4)))
|
||||
|
||||
# Extract user information from the token
|
||||
username = payload.get("sub", "")
|
||||
groups = payload.get("groups", [])
|
||||
|
||||
return AccessAttributes(
|
||||
roles=[username], # Use username as a role
|
||||
teams=groups, # Use Kubernetes groups as teams
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to validate Kubernetes token")
|
||||
raise ValueError("Invalid or expired token") from e
|
||||
|
||||
async def close(self):
|
||||
"""Close the HTTP client."""
|
||||
if self._client:
|
||||
self._client.close()
|
||||
self._client = None
|
||||
|
||||
|
||||
class CustomAuthProvider(AuthProvider):
|
||||
"""Custom authentication provider that uses an external endpoint."""
|
||||
|
||||
def __init__(self, config: Dict[str, str]):
|
||||
self.endpoint = config["endpoint"]
|
||||
self._client = None
|
||||
|
||||
async def validate_token(self, token: str, scope: Optional[Dict] = None) -> Optional[AccessAttributes]:
|
||||
"""Validate a token using the custom authentication endpoint."""
|
||||
if not self.endpoint:
|
||||
raise ValueError("Authentication endpoint not configured")
|
||||
|
||||
if scope is None:
|
||||
scope = {}
|
||||
|
||||
headers = dict(scope.get("headers", []))
|
||||
path = scope.get("path", "")
|
||||
request_headers = {k.decode(): v.decode() for k, v in headers.items()}
|
||||
|
||||
# Remove sensitive headers
|
||||
if "authorization" in request_headers:
|
||||
del request_headers["authorization"]
|
||||
|
||||
query_string = scope.get("query_string", b"").decode()
|
||||
params = parse_qs(query_string)
|
||||
|
||||
# Build the auth request model
|
||||
auth_request = AuthRequest(
|
||||
api_key=token,
|
||||
request=AuthRequestContext(
|
||||
path=path,
|
||||
headers=request_headers,
|
||||
params=params,
|
||||
),
|
||||
)
|
||||
|
||||
# Validate with authentication endpoint
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
self.endpoint,
|
||||
json=auth_request.model_dump(),
|
||||
timeout=10.0, # Add a reasonable timeout
|
||||
)
|
||||
if response.status_code != 200:
|
||||
logger.warning(f"Authentication failed with status code: {response.status_code}")
|
||||
raise ValueError(f"Authentication failed: {response.status_code}")
|
||||
|
||||
# Parse and validate the auth response
|
||||
try:
|
||||
response_data = response.json()
|
||||
auth_response = AuthResponse(**response_data)
|
||||
|
||||
# Store attributes in request scope for access control
|
||||
if auth_response.access_attributes:
|
||||
return auth_response.access_attributes
|
||||
else:
|
||||
logger.warning("No access attributes, setting namespace to api_key by default")
|
||||
user_attributes = {
|
||||
"namespaces": [token],
|
||||
}
|
||||
|
||||
scope["user_attributes"] = user_attributes
|
||||
logger.debug(f"Authentication successful: {len(user_attributes)} attributes")
|
||||
return auth_response.access_attributes
|
||||
except Exception as e:
|
||||
logger.exception("Error parsing authentication response")
|
||||
raise ValueError("Invalid authentication response format") from e
|
||||
|
||||
except httpx.TimeoutException:
|
||||
logger.exception("Authentication request timed out")
|
||||
raise
|
||||
except ValueError:
|
||||
# Re-raise ValueError exceptions to preserve their message
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("Error during authentication")
|
||||
raise ValueError("Authentication service error") from e
|
||||
|
||||
async def close(self):
|
||||
"""Close the HTTP client."""
|
||||
if self._client:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
|
||||
|
||||
def create_auth_provider(config: AuthProviderConfig) -> AuthProvider:
|
||||
"""Factory function to create the appropriate auth provider."""
|
||||
provider_type = config.provider_type.lower()
|
||||
|
||||
if provider_type == "kubernetes":
|
||||
return KubernetesAuthProvider(config.config)
|
||||
elif provider_type == "custom":
|
||||
return CustomAuthProvider(config.config)
|
||||
else:
|
||||
supported_providers = ", ".join([t.value for t in AuthProviderType])
|
||||
raise ValueError(f"Unsupported auth provider type: {provider_type}. Supported types are: {supported_providers}")
|
|
@ -419,9 +419,9 @@ def main(args: Optional[argparse.Namespace] = None):
|
|||
app.add_middleware(ClientVersionMiddleware)
|
||||
|
||||
# Add authentication middleware if configured
|
||||
if config.server.auth and config.server.auth.endpoint:
|
||||
logger.info(f"Enabling authentication with endpoint: {config.server.auth.endpoint}")
|
||||
app.add_middleware(AuthenticationMiddleware, auth_endpoint=config.server.auth.endpoint)
|
||||
if config.server.auth:
|
||||
logger.info(f"Enabling authentication with provider: {config.server.auth.provider_type.value}")
|
||||
app.add_middleware(AuthenticationMiddleware, auth_config=config.server.auth)
|
||||
|
||||
try:
|
||||
impls = asyncio.run(construct_stack(config))
|
||||
|
|
|
@ -94,12 +94,16 @@ def tool_chat_page():
|
|||
st.subheader("Agent Configurations")
|
||||
st.subheader("Agent Type")
|
||||
agent_type = st.radio(
|
||||
"Select Agent Type",
|
||||
[AgentType.REGULAR, AgentType.REACT],
|
||||
format_func=lambda x: x.value,
|
||||
label="Select Agent Type",
|
||||
options=["Regular", "ReAct"],
|
||||
on_change=reset_agent,
|
||||
)
|
||||
|
||||
if agent_type == "ReAct":
|
||||
agent_type = AgentType.REACT
|
||||
else:
|
||||
agent_type = AgentType.REGULAR
|
||||
|
||||
max_tokens = st.slider(
|
||||
"Max Tokens",
|
||||
min_value=0,
|
||||
|
|
|
@ -792,6 +792,13 @@ def llama3_3_instruct_models() -> List[Model]:
|
|||
@lru_cache
|
||||
def safety_models() -> List[Model]:
|
||||
return [
|
||||
Model(
|
||||
core_model_id=CoreModelId.llama_guard_4_12b,
|
||||
description="Llama Guard v4 12b system safety model",
|
||||
huggingface_repo="meta-llama/Llama-Guard-4-12B",
|
||||
arch_args={},
|
||||
pth_file_count=1,
|
||||
),
|
||||
Model(
|
||||
core_model_id=CoreModelId.llama_guard_3_11b_vision,
|
||||
description="Llama Guard v3 11b vision system safety model",
|
||||
|
|
|
@ -81,6 +81,7 @@ class CoreModelId(Enum):
|
|||
llama_guard_2_8b = "Llama-Guard-2-8B"
|
||||
llama_guard_3_11b_vision = "Llama-Guard-3-11B-Vision"
|
||||
llama_guard_3_1b = "Llama-Guard-3-1B"
|
||||
llama_guard_4_12b = "Llama-Guard-4-12B"
|
||||
|
||||
|
||||
def is_multimodal(model_id) -> bool:
|
||||
|
@ -148,6 +149,7 @@ def model_family(model_id) -> ModelFamily:
|
|||
CoreModelId.llama_guard_2_8b,
|
||||
CoreModelId.llama_guard_3_11b_vision,
|
||||
CoreModelId.llama_guard_3_1b,
|
||||
CoreModelId.llama_guard_4_12b,
|
||||
]:
|
||||
return ModelFamily.safety
|
||||
else:
|
||||
|
@ -225,5 +227,7 @@ class Model(BaseModel):
|
|||
CoreModelId.llama_guard_3_1b,
|
||||
]:
|
||||
return 131072
|
||||
elif self.core_model_id == CoreModelId.llama_guard_4_12b:
|
||||
return 8192
|
||||
else:
|
||||
raise ValueError(f"Unknown max_seq_len for {self.core_model_id}")
|
||||
|
|
|
@ -23,6 +23,9 @@ from llama_stack.apis.agents import (
|
|||
Document,
|
||||
ListAgentSessionsResponse,
|
||||
ListAgentsResponse,
|
||||
OpenAIResponseInputMessage,
|
||||
OpenAIResponseInputTool,
|
||||
OpenAIResponseObject,
|
||||
Session,
|
||||
Turn,
|
||||
)
|
||||
|
@ -40,6 +43,7 @@ from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_imp
|
|||
|
||||
from .agent_instance import ChatAgent
|
||||
from .config import MetaReferenceAgentsImplConfig
|
||||
from .openai_responses import OpenAIResponsesImpl
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
|
@ -63,9 +67,16 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self.tool_groups_api = tool_groups_api
|
||||
|
||||
self.in_memory_store = InmemoryKVStoreImpl()
|
||||
self.openai_responses_impl = None
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.persistence_store = await kvstore_impl(self.config.persistence_store)
|
||||
self.openai_responses_impl = OpenAIResponsesImpl(
|
||||
self.persistence_store,
|
||||
inference_api=self.inference_api,
|
||||
tool_groups_api=self.tool_groups_api,
|
||||
tool_runtime_api=self.tool_runtime_api,
|
||||
)
|
||||
|
||||
# check if "bwrap" is available
|
||||
if not shutil.which("bwrap"):
|
||||
|
@ -244,3 +255,23 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
agent_id: str,
|
||||
) -> ListAgentSessionsResponse:
|
||||
pass
|
||||
|
||||
# OpenAI responses
|
||||
async def get_openai_response(
|
||||
self,
|
||||
id: str,
|
||||
) -> OpenAIResponseObject:
|
||||
return await self.openai_responses_impl.get_openai_response(id)
|
||||
|
||||
async def create_openai_response(
|
||||
self,
|
||||
input: Union[str, List[OpenAIResponseInputMessage]],
|
||||
model: str,
|
||||
previous_response_id: Optional[str] = None,
|
||||
store: Optional[bool] = True,
|
||||
stream: Optional[bool] = False,
|
||||
tools: Optional[List[OpenAIResponseInputTool]] = None,
|
||||
) -> OpenAIResponseObject:
|
||||
return await self.openai_responses_impl.create_openai_response(
|
||||
input, model, previous_response_id, store, stream, tools
|
||||
)
|
||||
|
|
|
@ -0,0 +1,319 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from typing import AsyncIterator, List, Optional, Union, cast
|
||||
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseInputMessage,
|
||||
OpenAIResponseInputMessageContentImage,
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseInputTool,
|
||||
OpenAIResponseObject,
|
||||
OpenAIResponseObjectStream,
|
||||
OpenAIResponseObjectStreamResponseCompleted,
|
||||
OpenAIResponseObjectStreamResponseCreated,
|
||||
OpenAIResponseOutput,
|
||||
OpenAIResponseOutputMessage,
|
||||
OpenAIResponseOutputMessageContentOutputText,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
Inference,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartParam,
|
||||
OpenAIChatCompletionContentPartTextParam,
|
||||
OpenAIChatCompletionToolCallFunction,
|
||||
OpenAIChoice,
|
||||
OpenAIImageURL,
|
||||
OpenAIMessageParam,
|
||||
OpenAIToolMessageParam,
|
||||
OpenAIUserMessageParam,
|
||||
)
|
||||
from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolRuntime
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
|
||||
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
|
||||
from llama_stack.providers.utils.kvstore import KVStore
|
||||
|
||||
logger = get_logger(name=__name__, category="openai_responses")
|
||||
|
||||
OPENAI_RESPONSES_PREFIX = "openai_responses:"
|
||||
|
||||
|
||||
async def _previous_response_to_messages(previous_response: OpenAIResponseObject) -> List[OpenAIMessageParam]:
|
||||
messages: List[OpenAIMessageParam] = []
|
||||
for output_message in previous_response.output:
|
||||
if isinstance(output_message, OpenAIResponseOutputMessage):
|
||||
messages.append(OpenAIAssistantMessageParam(content=output_message.content[0].text))
|
||||
return messages
|
||||
|
||||
|
||||
async def _openai_choices_to_output_messages(choices: List[OpenAIChoice]) -> List[OpenAIResponseOutputMessage]:
|
||||
output_messages = []
|
||||
for choice in choices:
|
||||
output_content = ""
|
||||
if isinstance(choice.message.content, str):
|
||||
output_content = choice.message.content
|
||||
elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam):
|
||||
output_content = choice.message.content.text
|
||||
# TODO: handle image content
|
||||
output_messages.append(
|
||||
OpenAIResponseOutputMessage(
|
||||
id=f"msg_{uuid.uuid4()}",
|
||||
content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
|
||||
status="completed",
|
||||
)
|
||||
)
|
||||
return output_messages
|
||||
|
||||
|
||||
class OpenAIResponsesImpl:
|
||||
def __init__(
|
||||
self,
|
||||
persistence_store: KVStore,
|
||||
inference_api: Inference,
|
||||
tool_groups_api: ToolGroups,
|
||||
tool_runtime_api: ToolRuntime,
|
||||
):
|
||||
self.persistence_store = persistence_store
|
||||
self.inference_api = inference_api
|
||||
self.tool_groups_api = tool_groups_api
|
||||
self.tool_runtime_api = tool_runtime_api
|
||||
|
||||
async def get_openai_response(
|
||||
self,
|
||||
id: str,
|
||||
) -> OpenAIResponseObject:
|
||||
key = f"{OPENAI_RESPONSES_PREFIX}{id}"
|
||||
response_json = await self.persistence_store.get(key=key)
|
||||
if response_json is None:
|
||||
raise ValueError(f"OpenAI response with id '{id}' not found")
|
||||
return OpenAIResponseObject.model_validate_json(response_json)
|
||||
|
||||
async def create_openai_response(
|
||||
self,
|
||||
input: Union[str, List[OpenAIResponseInputMessage]],
|
||||
model: str,
|
||||
previous_response_id: Optional[str] = None,
|
||||
store: Optional[bool] = True,
|
||||
stream: Optional[bool] = False,
|
||||
tools: Optional[List[OpenAIResponseInputTool]] = None,
|
||||
):
|
||||
stream = False if stream is None else stream
|
||||
|
||||
messages: List[OpenAIMessageParam] = []
|
||||
if previous_response_id:
|
||||
previous_response = await self.get_openai_response(previous_response_id)
|
||||
messages.extend(await _previous_response_to_messages(previous_response))
|
||||
# TODO: refactor this user_content parsing out into a separate method
|
||||
user_content: Union[str, List[OpenAIChatCompletionContentPartParam]] = ""
|
||||
if isinstance(input, list):
|
||||
user_content = []
|
||||
for user_input in input:
|
||||
if isinstance(user_input.content, list):
|
||||
for user_input_content in user_input.content:
|
||||
if isinstance(user_input_content, OpenAIResponseInputMessageContentText):
|
||||
user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input_content.text))
|
||||
elif isinstance(user_input_content, OpenAIResponseInputMessageContentImage):
|
||||
if user_input_content.image_url:
|
||||
image_url = OpenAIImageURL(
|
||||
url=user_input_content.image_url, detail=user_input_content.detail
|
||||
)
|
||||
user_content.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
|
||||
else:
|
||||
user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input.content))
|
||||
else:
|
||||
user_content = input
|
||||
messages.append(OpenAIUserMessageParam(content=user_content))
|
||||
|
||||
chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None
|
||||
chat_response = await self.inference_api.openai_chat_completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=chat_tools,
|
||||
stream=stream,
|
||||
)
|
||||
|
||||
if stream:
|
||||
# TODO: refactor this into a separate method that handles streaming
|
||||
chat_response_id = ""
|
||||
chat_response_content = []
|
||||
# TODO: these chunk_ fields are hacky and only take the last chunk into account
|
||||
chunk_created = 0
|
||||
chunk_model = ""
|
||||
chunk_finish_reason = ""
|
||||
async for chunk in chat_response:
|
||||
chat_response_id = chunk.id
|
||||
chunk_created = chunk.created
|
||||
chunk_model = chunk.model
|
||||
for chunk_choice in chunk.choices:
|
||||
# TODO: this only works for text content
|
||||
chat_response_content.append(chunk_choice.delta.content or "")
|
||||
if chunk_choice.finish_reason:
|
||||
chunk_finish_reason = chunk_choice.finish_reason
|
||||
assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content))
|
||||
chat_response = OpenAIChatCompletion(
|
||||
id=chat_response_id,
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
message=assistant_message,
|
||||
finish_reason=chunk_finish_reason,
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
created=chunk_created,
|
||||
model=chunk_model,
|
||||
)
|
||||
else:
|
||||
# dump and reload to map to our pydantic types
|
||||
chat_response = OpenAIChatCompletion(**chat_response.model_dump())
|
||||
|
||||
output_messages: List[OpenAIResponseOutput] = []
|
||||
if chat_response.choices[0].message.tool_calls:
|
||||
output_messages.extend(
|
||||
await self._execute_tool_and_return_final_output(model, stream, chat_response, messages)
|
||||
)
|
||||
else:
|
||||
output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
|
||||
response = OpenAIResponseObject(
|
||||
created_at=chat_response.created,
|
||||
id=f"resp-{uuid.uuid4()}",
|
||||
model=model,
|
||||
object="response",
|
||||
status="completed",
|
||||
output=output_messages,
|
||||
)
|
||||
|
||||
if store:
|
||||
# Store in kvstore
|
||||
key = f"{OPENAI_RESPONSES_PREFIX}{response.id}"
|
||||
await self.persistence_store.set(
|
||||
key=key,
|
||||
value=response.model_dump_json(),
|
||||
)
|
||||
|
||||
if stream:
|
||||
|
||||
async def async_response() -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
# TODO: response created should actually get emitted much earlier in the process
|
||||
yield OpenAIResponseObjectStreamResponseCreated(response=response)
|
||||
yield OpenAIResponseObjectStreamResponseCompleted(response=response)
|
||||
|
||||
return async_response()
|
||||
|
||||
return response
|
||||
|
||||
async def _convert_response_tools_to_chat_tools(
|
||||
self, tools: List[OpenAIResponseInputTool]
|
||||
) -> List[ChatCompletionToolParam]:
|
||||
chat_tools: List[ChatCompletionToolParam] = []
|
||||
for input_tool in tools:
|
||||
# TODO: Handle other tool types
|
||||
if input_tool.type == "web_search":
|
||||
tool_name = "web_search"
|
||||
tool = await self.tool_groups_api.get_tool(tool_name)
|
||||
tool_def = ToolDefinition(
|
||||
tool_name=tool_name,
|
||||
description=tool.description,
|
||||
parameters={
|
||||
param.name: ToolParamDefinition(
|
||||
param_type=param.parameter_type,
|
||||
description=param.description,
|
||||
required=param.required,
|
||||
default=param.default,
|
||||
)
|
||||
for param in tool.parameters
|
||||
},
|
||||
)
|
||||
chat_tool = convert_tooldef_to_openai_tool(tool_def)
|
||||
chat_tools.append(chat_tool)
|
||||
else:
|
||||
raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
|
||||
return chat_tools
|
||||
|
||||
async def _execute_tool_and_return_final_output(
|
||||
self, model_id: str, stream: bool, chat_response: OpenAIChatCompletion, messages: List[OpenAIMessageParam]
|
||||
) -> List[OpenAIResponseOutput]:
|
||||
output_messages: List[OpenAIResponseOutput] = []
|
||||
choice = chat_response.choices[0]
|
||||
|
||||
# If the choice is not an assistant message, we don't need to execute any tools
|
||||
if not isinstance(choice.message, OpenAIAssistantMessageParam):
|
||||
return output_messages
|
||||
|
||||
# If the assistant message doesn't have any tool calls, we don't need to execute any tools
|
||||
if not choice.message.tool_calls:
|
||||
return output_messages
|
||||
|
||||
# Add the assistant message with tool_calls response to the messages list
|
||||
messages.append(choice.message)
|
||||
|
||||
for tool_call in choice.message.tool_calls:
|
||||
tool_call_id = tool_call.id
|
||||
function = tool_call.function
|
||||
|
||||
# If for some reason the tool call doesn't have a function or id, we can't execute it
|
||||
if not function or not tool_call_id:
|
||||
continue
|
||||
|
||||
# TODO: telemetry spans for tool calls
|
||||
result = await self._execute_tool_call(function)
|
||||
|
||||
# Handle tool call failure
|
||||
if not result:
|
||||
output_messages.append(
|
||||
OpenAIResponseOutputMessageWebSearchToolCall(
|
||||
id=tool_call_id,
|
||||
status="failed",
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
output_messages.append(
|
||||
OpenAIResponseOutputMessageWebSearchToolCall(
|
||||
id=tool_call_id,
|
||||
status="completed",
|
||||
),
|
||||
)
|
||||
|
||||
result_content = ""
|
||||
# TODO: handle other result content types and lists
|
||||
if isinstance(result.content, str):
|
||||
result_content = result.content
|
||||
messages.append(OpenAIToolMessageParam(content=result_content, tool_call_id=tool_call_id))
|
||||
|
||||
tool_results_chat_response = await self.inference_api.openai_chat_completion(
|
||||
model=model_id,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
)
|
||||
# type cast to appease mypy
|
||||
tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response)
|
||||
tool_final_outputs = await _openai_choices_to_output_messages(tool_results_chat_response.choices)
|
||||
# TODO: Wire in annotations with URLs, titles, etc to these output messages
|
||||
output_messages.extend(tool_final_outputs)
|
||||
return output_messages
|
||||
|
||||
async def _execute_tool_call(
|
||||
self,
|
||||
function: OpenAIChatCompletionToolCallFunction,
|
||||
) -> Optional[ToolInvocationResult]:
|
||||
if not function.name:
|
||||
return None
|
||||
function_args = json.loads(function.arguments) if function.arguments else {}
|
||||
logger.info(f"executing tool call: {function.name} with args: {function_args}")
|
||||
result = await self.tool_runtime_api.invoke_tool(
|
||||
tool_name=function.name,
|
||||
kwargs=function_args,
|
||||
)
|
||||
logger.debug(f"tool call {function.name} completed with result: {result}")
|
||||
return result
|
|
@ -17,10 +17,8 @@ from llama_stack.apis.common.type_system import (
|
|||
DialogType,
|
||||
StringType,
|
||||
)
|
||||
from llama_stack.apis.datasets import Datasets
|
||||
from llama_stack.providers.utils.common.data_schema_validator import (
|
||||
ColumnName,
|
||||
validate_dataset_schema,
|
||||
)
|
||||
|
||||
EXPECTED_DATASET_SCHEMA: dict[str, list[dict[str, Any]]] = {
|
||||
|
@ -36,21 +34,3 @@ EXPECTED_DATASET_SCHEMA: dict[str, list[dict[str, Any]]] = {
|
|||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
async def validate_input_dataset_schema(
|
||||
datasets_api: Datasets,
|
||||
dataset_id: str,
|
||||
dataset_type: str,
|
||||
) -> None:
|
||||
dataset_def = await datasets_api.get_dataset(dataset_id=dataset_id)
|
||||
if not dataset_def:
|
||||
raise ValueError(f"Dataset {dataset_id} does not exist.")
|
||||
|
||||
if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0:
|
||||
raise ValueError(f"Dataset {dataset_id} does not have a schema defined.")
|
||||
|
||||
if dataset_type not in EXPECTED_DATASET_SCHEMA:
|
||||
raise ValueError(f"Dataset type {dataset_type} is not supported.")
|
||||
|
||||
validate_dataset_schema(dataset_def.dataset_schema, EXPECTED_DATASET_SCHEMA[dataset_type])
|
||||
|
|
|
@ -48,9 +48,6 @@ from llama_stack.apis.post_training import (
|
|||
from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
|
||||
from llama_stack.distribution.utils.model_utils import model_local_dir
|
||||
from llama_stack.models.llama.sku_list import resolve_model
|
||||
from llama_stack.providers.inline.post_training.common.validator import (
|
||||
validate_input_dataset_schema,
|
||||
)
|
||||
from llama_stack.providers.inline.post_training.torchtune.common import utils
|
||||
from llama_stack.providers.inline.post_training.torchtune.common.checkpointer import (
|
||||
TorchtuneCheckpointer,
|
||||
|
@ -348,11 +345,9 @@ class LoraFinetuningSingleDevice:
|
|||
all_rows = await fetch_rows(dataset_id)
|
||||
rows = all_rows.data
|
||||
|
||||
await validate_input_dataset_schema(
|
||||
datasets_api=self.datasets_api,
|
||||
dataset_id=dataset_id,
|
||||
dataset_type=self._data_format.value,
|
||||
)
|
||||
# TODO (xiyan): validate dataset schema
|
||||
# dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
|
||||
|
||||
data_transform = await utils.get_data_transform(self._data_format)
|
||||
ds = SFTDataset(
|
||||
rows,
|
||||
|
|
|
@ -30,7 +30,7 @@ class TelemetryConfig(BaseModel):
|
|||
)
|
||||
service_name: str = Field(
|
||||
# service name is always the same, use zero-width space to avoid clutter
|
||||
default="",
|
||||
default="",
|
||||
description="The service name to use for telemetry",
|
||||
)
|
||||
sinks: List[TelemetrySink] = Field(
|
||||
|
@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> Dict[str, Any]:
|
||||
return {
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:}",
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:}",
|
||||
"sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
|
||||
"sqlite_db_path": "${env.SQLITE_DB_PATH:" + __distro_dir__ + "/" + db_name + "}",
|
||||
"sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
|
||||
}
|
||||
|
|
|
@ -227,6 +227,16 @@ def available_providers() -> List[ProviderSpec]:
|
|||
provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="llama-openai-compat",
|
||||
pip_packages=["litellm"],
|
||||
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
||||
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.inference import Inference
|
||||
|
||||
from .config import LlamaCompatConfig
|
||||
|
||||
|
||||
async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> Inference:
|
||||
# import dynamically so the import is used only when it is needed
|
||||
from .llama import LlamaCompatInferenceAdapter
|
||||
|
||||
adapter = LlamaCompatInferenceAdapter(config)
|
||||
return adapter
|
|
@ -0,0 +1,38 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
class LlamaProviderDataValidator(BaseModel):
|
||||
llama_api_key: Optional[str] = Field(
|
||||
default=None,
|
||||
description="API key for api.llama models",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class LlamaCompatConfig(BaseModel):
|
||||
api_key: Optional[str] = Field(
|
||||
default=None,
|
||||
description="The Llama API key",
|
||||
)
|
||||
|
||||
openai_compat_api_base: str = Field(
|
||||
default="https://api.llama.com/compat/v1/",
|
||||
description="The URL for the Llama API server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> Dict[str, Any]:
|
||||
return {
|
||||
"openai_compat_api_base": "https://api.llama.com/compat/v1/",
|
||||
"api_key": api_key,
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.remote.inference.llama_openai_compat.config import (
|
||||
LlamaCompatConfig,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||
LiteLLMOpenAIMixin,
|
||||
)
|
||||
|
||||
from .models import MODEL_ENTRIES
|
||||
|
||||
|
||||
class LlamaCompatInferenceAdapter(LiteLLMOpenAIMixin):
|
||||
_config: LlamaCompatConfig
|
||||
|
||||
def __init__(self, config: LlamaCompatConfig):
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
model_entries=MODEL_ENTRIES,
|
||||
api_key_from_config=config.api_key,
|
||||
provider_data_api_key_field="llama_api_key",
|
||||
openai_compat_api_base=config.openai_compat_api_base,
|
||||
)
|
||||
self.config = config
|
||||
|
||||
async def initialize(self):
|
||||
await super().initialize()
|
||||
|
||||
async def shutdown(self):
|
||||
await super().shutdown()
|
|
@ -0,0 +1,25 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.models.llama.sku_types import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_hf_repo_model_entry,
|
||||
)
|
||||
|
||||
MODEL_ENTRIES = [
|
||||
build_hf_repo_model_entry(
|
||||
"Llama-3.3-70B-Instruct",
|
||||
CoreModelId.llama3_3_70b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"Llama-4-Scout-17B-16E-Instruct-FP8",
|
||||
CoreModelId.llama4_scout_17b_16e_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"Llama-4-Maverick-17B-128E-Instruct-FP8",
|
||||
CoreModelId.llama4_maverick_17b_128e_instruct.value,
|
||||
),
|
||||
]
|
|
@ -433,6 +433,12 @@ class OllamaInferenceAdapter(
|
|||
user: Optional[str] = None,
|
||||
) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
|
||||
model_obj = await self._get_model(model)
|
||||
|
||||
# ollama still makes tool calls even when tool_choice is "none"
|
||||
# so we need to remove the tools in that case
|
||||
if tool_choice == "none" and tools is not None:
|
||||
tools = None
|
||||
|
||||
params = {
|
||||
k: v
|
||||
for k, v in {
|
||||
|
|
|
@ -90,6 +90,9 @@ class LiteLLMOpenAIMixin(
|
|||
raise ValueError(f"Unsupported model: {model.provider_resource_id}")
|
||||
return model
|
||||
|
||||
def get_litellm_model_name(self, model_id: str) -> str:
|
||||
return "openai/" + model_id if self.is_openai_compat else model_id
|
||||
|
||||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -130,8 +133,7 @@ class LiteLLMOpenAIMixin(
|
|||
)
|
||||
|
||||
params = await self._get_params(request)
|
||||
if self.is_openai_compat:
|
||||
params["model"] = "openai/" + params["model"]
|
||||
params["model"] = self.get_litellm_model_name(params["model"])
|
||||
|
||||
logger.debug(f"params to litellm (openai compat): {params}")
|
||||
# unfortunately, we need to use synchronous litellm.completion here because litellm
|
||||
|
@ -220,21 +222,23 @@ class LiteLLMOpenAIMixin(
|
|||
else request.tool_config.tool_choice
|
||||
)
|
||||
|
||||
return {
|
||||
"model": request.model,
|
||||
"api_key": self.get_api_key(),
|
||||
"api_base": self.api_base,
|
||||
**input_dict,
|
||||
"stream": request.stream,
|
||||
**get_sampling_options(request.sampling_params),
|
||||
}
|
||||
|
||||
def get_api_key(self) -> str:
|
||||
provider_data = self.get_request_provider_data()
|
||||
key_field = self.provider_data_api_key_field
|
||||
if provider_data and getattr(provider_data, key_field, None):
|
||||
api_key = getattr(provider_data, key_field)
|
||||
else:
|
||||
api_key = self.api_key_from_config
|
||||
|
||||
return {
|
||||
"model": request.model,
|
||||
"api_key": api_key,
|
||||
"api_base": self.api_base,
|
||||
**input_dict,
|
||||
"stream": request.stream,
|
||||
**get_sampling_options(request.sampling_params),
|
||||
}
|
||||
return api_key
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
|
@ -247,7 +251,7 @@ class LiteLLMOpenAIMixin(
|
|||
model = await self.model_store.get_model(model_id)
|
||||
|
||||
response = litellm.embedding(
|
||||
model=model.provider_resource_id,
|
||||
model=self.get_litellm_model_name(model.provider_resource_id),
|
||||
input=[interleaved_content_as_str(content) for content in contents],
|
||||
)
|
||||
|
||||
|
@ -278,7 +282,7 @@ class LiteLLMOpenAIMixin(
|
|||
) -> OpenAICompletion:
|
||||
model_obj = await self.model_store.get_model(model)
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
model=self.get_litellm_model_name(model_obj.provider_resource_id),
|
||||
prompt=prompt,
|
||||
best_of=best_of,
|
||||
echo=echo,
|
||||
|
@ -297,6 +301,8 @@ class LiteLLMOpenAIMixin(
|
|||
user=user,
|
||||
guided_choice=guided_choice,
|
||||
prompt_logprobs=prompt_logprobs,
|
||||
api_key=self.get_api_key(),
|
||||
api_base=self.api_base,
|
||||
)
|
||||
return await litellm.atext_completion(**params)
|
||||
|
||||
|
@ -328,7 +334,7 @@ class LiteLLMOpenAIMixin(
|
|||
) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
|
||||
model_obj = await self.model_store.get_model(model)
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
model=self.get_litellm_model_name(model_obj.provider_resource_id),
|
||||
messages=messages,
|
||||
frequency_penalty=frequency_penalty,
|
||||
function_call=function_call,
|
||||
|
@ -351,6 +357,8 @@ class LiteLLMOpenAIMixin(
|
|||
top_logprobs=top_logprobs,
|
||||
top_p=top_p,
|
||||
user=user,
|
||||
api_key=self.get_api_key(),
|
||||
api_base=self.api_base,
|
||||
)
|
||||
return await litellm.acompletion(**params)
|
||||
|
||||
|
|
|
@ -638,10 +638,13 @@ async def convert_message_to_openai_dict_new(
|
|||
)
|
||||
for tool in message.tool_calls
|
||||
]
|
||||
params = {}
|
||||
if tool_calls:
|
||||
params["tool_calls"] = tool_calls
|
||||
out = OpenAIChatCompletionAssistantMessage(
|
||||
role="assistant",
|
||||
content=await _convert_message_content(message.content),
|
||||
tool_calls=tool_calls or None,
|
||||
**params,
|
||||
)
|
||||
elif isinstance(message, ToolResponseMessage):
|
||||
out = OpenAIChatCompletionToolMessage(
|
||||
|
|
|
@ -478,6 +478,8 @@ class JsonSchemaGenerator:
|
|||
}
|
||||
return ret
|
||||
elif origin_type is Literal:
|
||||
if len(typing.get_args(typ)) != 1:
|
||||
raise ValueError(f"Literal type {typ} has {len(typing.get_args(typ))} arguments")
|
||||
(literal_value,) = typing.get_args(typ) # unpack value of literal type
|
||||
schema = self.type_to_schema(type(literal_value))
|
||||
schema["const"] = literal_value
|
||||
|
|
|
@ -39,9 +39,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/bedrock/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -79,9 +79,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/cerebras/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
|
@ -42,9 +42,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ci-tests/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -45,9 +45,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dell/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -41,9 +41,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dell/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -344,6 +344,45 @@
|
|||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"llama_api": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"litellm",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlite-vec",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"meta-reference-gpu": [
|
||||
"accelerate",
|
||||
"aiosqlite",
|
||||
|
|
|
@ -71,9 +71,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dev/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -50,9 +50,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/fireworks/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -45,9 +45,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/fireworks/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -45,9 +45,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/groq/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -50,9 +50,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-endpoint/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -45,9 +45,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-endpoint/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -50,9 +50,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-serverless/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -45,9 +45,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-serverless/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
7
llama_stack/templates/llama_api/__init__.py
Normal file
7
llama_stack/templates/llama_api/__init__.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .llama_api import get_distribution_template # noqa: F401
|
33
llama_stack/templates/llama_api/build.yaml
Normal file
33
llama_stack/templates/llama_api/build.yaml
Normal file
|
@ -0,0 +1,33 @@
|
|||
version: '2'
|
||||
distribution_spec:
|
||||
description: Distribution for running e2e tests in CI
|
||||
providers:
|
||||
inference:
|
||||
- remote::llama-openai-compat
|
||||
- inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::sqlite-vec
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
eval:
|
||||
- inline::meta-reference
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
159
llama_stack/templates/llama_api/llama_api.py
Normal file
159
llama_stack/templates/llama_api/llama_api.py
Normal file
|
@ -0,0 +1,159 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import List, Tuple
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
|
||||
SQLiteVectorIOConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.llama_openai_compat.config import (
|
||||
LlamaCompatConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.llama_openai_compat.models import (
|
||||
MODEL_ENTRIES as LLLAMA_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
||||
from llama_stack.providers.remote.vector_io.pgvector.config import (
|
||||
PGVectorVectorIOConfig,
|
||||
)
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
|
||||
# in this template, we allow each API key to be optional
|
||||
providers = [
|
||||
(
|
||||
"llama-openai-compat",
|
||||
LLLAMA_MODEL_ENTRIES,
|
||||
LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:}"),
|
||||
),
|
||||
]
|
||||
inference_providers = []
|
||||
available_models = {}
|
||||
for provider_id, model_entries, config in providers:
|
||||
inference_providers.append(
|
||||
Provider(
|
||||
provider_id=provider_id,
|
||||
provider_type=f"remote::{provider_id}",
|
||||
config=config,
|
||||
)
|
||||
)
|
||||
available_models[provider_id] = model_entries
|
||||
return inference_providers, available_models
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
inference_providers, available_models = get_inference_providers()
|
||||
providers = {
|
||||
"inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
|
||||
"vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::rag-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
name = "llama_api"
|
||||
|
||||
vector_io_providers = [
|
||||
Provider(
|
||||
provider_id="sqlite-vec",
|
||||
provider_type="inline::sqlite-vec",
|
||||
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.ENABLE_CHROMADB+chromadb}",
|
||||
provider_type="remote::chromadb",
|
||||
config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.ENABLE_PGVECTOR+pgvector}",
|
||||
provider_type="remote::pgvector",
|
||||
config=PGVectorVectorIOConfig.sample_run_config(
|
||||
db="${env.PGVECTOR_DB:}",
|
||||
user="${env.PGVECTOR_USER:}",
|
||||
password="${env.PGVECTOR_PASSWORD:}",
|
||||
),
|
||||
),
|
||||
]
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id=embedding_provider.provider_id,
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
|
||||
default_models = get_model_registry(available_models)
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Distribution for running e2e tests in CI",
|
||||
container_image=None,
|
||||
template_path=None,
|
||||
providers=providers,
|
||||
available_models_by_provider=available_models,
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": inference_providers + [embedding_provider],
|
||||
"vector_io": vector_io_providers,
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
},
|
||||
)
|
167
llama_stack/templates/llama_api/run.yaml
Normal file
167
llama_stack/templates/llama_api/run.yaml
Normal file
|
@ -0,0 +1,167 @@
|
|||
version: '2'
|
||||
image_name: llama_api
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: llama-openai-compat
|
||||
provider_type: remote::llama-openai-compat
|
||||
config:
|
||||
openai_compat_api_base: https://api.llama.com/compat/v1/
|
||||
api_key: ${env.LLAMA_API_KEY:}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: sqlite-vec
|
||||
provider_type: inline::sqlite-vec
|
||||
config:
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/sqlite_vec.db
|
||||
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:}
|
||||
- provider_id: ${env.ENABLE_PGVECTOR+pgvector}
|
||||
provider_type: remote::pgvector
|
||||
config:
|
||||
host: ${env.PGVECTOR_HOST:localhost}
|
||||
port: ${env.PGVECTOR_PORT:5432}
|
||||
db: ${env.PGVECTOR_DB:}
|
||||
user: ${env.PGVECTOR_USER:}
|
||||
password: ${env.PGVECTOR_PASSWORD:}
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
excluded_categories: []
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/meta_reference_eval.db
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/huggingface_datasetio.db
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/localfs_datasetio.db
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: Llama-3.3-70B-Instruct
|
||||
provider_id: llama-openai-compat
|
||||
provider_model_id: Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: llama-openai-compat
|
||||
provider_model_id: Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Llama-4-Scout-17B-16E-Instruct-FP8
|
||||
provider_id: llama-openai-compat
|
||||
provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: llama-openai-compat
|
||||
provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
provider_id: llama-openai-compat
|
||||
provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: llama-openai-compat
|
||||
provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
|
@ -60,9 +60,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/meta-reference-gpu/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -50,9 +50,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/meta-reference-gpu/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -50,9 +50,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
|
||||
eval:
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
|
|
|
@ -45,9 +45,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
|
||||
eval:
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
|
|
|
@ -43,9 +43,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -41,9 +41,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -68,9 +68,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/open-benchmark/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -50,9 +50,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/passthrough/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -45,9 +45,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/passthrough/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -88,9 +88,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
|
@ -81,9 +81,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
|
@ -51,9 +51,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
|
@ -45,9 +45,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/tgi/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -44,9 +44,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/tgi/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -50,9 +50,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/together/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -45,9 +45,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/together/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -78,9 +78,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/verification/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -49,9 +49,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/vllm-gpu/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -43,9 +43,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/watsonx/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||
|
||||
[project]
|
||||
name = "llama_stack"
|
||||
version = "0.2.1"
|
||||
version = "0.2.4"
|
||||
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
|
||||
description = "Llama Stack"
|
||||
readme = "README.md"
|
||||
|
@ -27,7 +27,7 @@ dependencies = [
|
|||
"huggingface-hub",
|
||||
"jinja2>=3.1.6",
|
||||
"jsonschema",
|
||||
"llama-stack-client>=0.2.2",
|
||||
"llama-stack-client>=0.2.4",
|
||||
"openai>=1.66",
|
||||
"prompt-toolkit",
|
||||
"python-dotenv",
|
||||
|
@ -39,6 +39,7 @@ dependencies = [
|
|||
"tiktoken",
|
||||
"pillow",
|
||||
"h11>=0.16.0",
|
||||
"kubernetes",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
@ -67,7 +68,7 @@ unit = [
|
|||
"pypdf",
|
||||
"chardet",
|
||||
"qdrant-client",
|
||||
"opentelemetry-exporter-otlp-proto-http"
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
]
|
||||
# These are the core dependencies required for running integration tests. They are shared across all
|
||||
# providers. If a provider requires additional dependencies, please add them to your environment
|
||||
|
@ -104,7 +105,7 @@ codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
|
|||
ui = [
|
||||
"streamlit",
|
||||
"pandas",
|
||||
"llama-stack-client>=0.2.1",
|
||||
"llama-stack-client>=0.2.4",
|
||||
"streamlit-option-menu",
|
||||
]
|
||||
|
||||
|
@ -320,6 +321,7 @@ exclude = [
|
|||
"^llama_stack/strong_typing/serializer\\.py$",
|
||||
"^llama_stack/templates/dev/dev\\.py$",
|
||||
"^llama_stack/templates/groq/groq\\.py$",
|
||||
"^llama_stack/templates/llama_api/llama_api\\.py$",
|
||||
"^llama_stack/templates/sambanova/sambanova\\.py$",
|
||||
"^llama_stack/templates/template\\.py$",
|
||||
]
|
||||
|
|
|
@ -4,15 +4,18 @@ annotated-types==0.7.0
|
|||
anyio==4.8.0
|
||||
attrs==25.1.0
|
||||
blobfile==3.0.0
|
||||
cachetools==5.5.2
|
||||
certifi==2025.1.31
|
||||
charset-normalizer==3.4.1
|
||||
click==8.1.8
|
||||
colorama==0.4.6 ; sys_platform == 'win32'
|
||||
distro==1.9.0
|
||||
durationpy==0.9
|
||||
exceptiongroup==1.2.2 ; python_full_version < '3.11'
|
||||
filelock==3.17.0
|
||||
fire==0.7.0
|
||||
fsspec==2024.12.0
|
||||
google-auth==2.38.0
|
||||
h11==0.16.0
|
||||
httpcore==1.0.9
|
||||
httpx==0.28.1
|
||||
|
@ -22,18 +25,22 @@ jinja2==3.1.6
|
|||
jiter==0.8.2
|
||||
jsonschema==4.23.0
|
||||
jsonschema-specifications==2024.10.1
|
||||
llama-stack-client==0.2.2
|
||||
kubernetes==32.0.1
|
||||
llama-stack-client==0.2.4
|
||||
lxml==5.3.1
|
||||
markdown-it-py==3.0.0
|
||||
markupsafe==3.0.2
|
||||
mdurl==0.1.2
|
||||
numpy==2.2.3
|
||||
oauthlib==3.2.2
|
||||
openai==1.71.0
|
||||
packaging==24.2
|
||||
pandas==2.2.3
|
||||
pillow==11.1.0
|
||||
prompt-toolkit==3.0.50
|
||||
pyaml==25.1.0
|
||||
pyasn1==0.6.1
|
||||
pyasn1-modules==0.4.2
|
||||
pycryptodomex==3.21.0
|
||||
pydantic==2.10.6
|
||||
pydantic-core==2.27.2
|
||||
|
@ -45,8 +52,10 @@ pyyaml==6.0.2
|
|||
referencing==0.36.2
|
||||
regex==2024.11.6
|
||||
requests==2.32.3
|
||||
requests-oauthlib==2.0.0
|
||||
rich==13.9.4
|
||||
rpds-py==0.22.3
|
||||
rsa==4.9
|
||||
setuptools==75.8.0
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
|
@ -57,3 +66,4 @@ typing-extensions==4.12.2
|
|||
tzdata==2025.1
|
||||
urllib3==2.3.0
|
||||
wcwidth==0.2.13
|
||||
websocket-client==1.8.0
|
||||
|
|
|
@ -24,9 +24,9 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
|
|
|
@ -14,6 +14,7 @@ from pathlib import Path
|
|||
import pytest
|
||||
import yaml
|
||||
from llama_stack_client import LlamaStackClient
|
||||
from openai import OpenAI
|
||||
|
||||
from llama_stack import LlamaStackAsLibraryClient
|
||||
from llama_stack.apis.datatypes import Api
|
||||
|
@ -207,3 +208,9 @@ def llama_stack_client(request, provider_data, text_model_id):
|
|||
raise RuntimeError("Initialization failed")
|
||||
|
||||
return client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def openai_client(client_with_models):
|
||||
base_url = f"{client_with_models.base_url}/v1/openai/v1"
|
||||
return OpenAI(base_url=base_url, api_key="fake")
|
||||
|
|
37
tests/integration/test_cases/openai/responses.json
Normal file
37
tests/integration/test_cases/openai/responses.json
Normal file
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
"non_streaming_01": {
|
||||
"data": {
|
||||
"question": "Which planet do humans live on?",
|
||||
"expected": "Earth"
|
||||
}
|
||||
},
|
||||
"non_streaming_02": {
|
||||
"data": {
|
||||
"question": "Which planet has rings around it with a name starting with letter S?",
|
||||
"expected": "Saturn"
|
||||
}
|
||||
},
|
||||
"streaming_01": {
|
||||
"data": {
|
||||
"question": "What's the name of the Sun in latin?",
|
||||
"expected": "Sol"
|
||||
}
|
||||
},
|
||||
"streaming_02": {
|
||||
"data": {
|
||||
"question": "What is the name of the US captial?",
|
||||
"expected": "Washington"
|
||||
}
|
||||
},
|
||||
"tools_web_search_01": {
|
||||
"data": {
|
||||
"input": "How many experts does the Llama 4 Maverick model have?",
|
||||
"tools": [
|
||||
{
|
||||
"type": "web_search"
|
||||
}
|
||||
],
|
||||
"expected": "128"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -12,6 +12,7 @@ class TestCase:
|
|||
_apis = [
|
||||
"inference/chat_completion",
|
||||
"inference/completion",
|
||||
"openai/responses",
|
||||
]
|
||||
_jsonblob = {}
|
||||
|
||||
|
|
314
tests/unit/distribution/routers/test_routing_tables.py
Normal file
314
tests/unit/distribution/routers/test_routing_tables.py
Normal file
|
@ -0,0 +1,314 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
# Unit tests for the routing tables
|
||||
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.common.type_system import NumberType
|
||||
from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
|
||||
from llama_stack.apis.datatypes import Api
|
||||
from llama_stack.apis.models.models import Model, ModelType
|
||||
from llama_stack.apis.shields.shields import Shield
|
||||
from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolParameter
|
||||
from llama_stack.apis.vector_dbs.vector_dbs import VectorDB
|
||||
from llama_stack.distribution.routers.routing_tables import (
|
||||
BenchmarksRoutingTable,
|
||||
DatasetsRoutingTable,
|
||||
ModelsRoutingTable,
|
||||
ScoringFunctionsRoutingTable,
|
||||
ShieldsRoutingTable,
|
||||
ToolGroupsRoutingTable,
|
||||
VectorDBsRoutingTable,
|
||||
)
|
||||
from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry
|
||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||
from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def dist_registry(tmp_path):
|
||||
db_path = tmp_path / "test_kv.db"
|
||||
kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix())
|
||||
kvstore = SqliteKVStoreImpl(kvstore_config)
|
||||
await kvstore.initialize()
|
||||
registry = CachedDiskDistributionRegistry(kvstore)
|
||||
await registry.initialize()
|
||||
yield registry
|
||||
|
||||
|
||||
class Impl:
|
||||
def __init__(self, api: Api):
|
||||
self.api = api
|
||||
|
||||
@property
|
||||
def __provider_spec__(self):
|
||||
_provider_spec = AsyncMock()
|
||||
_provider_spec.api = self.api
|
||||
return _provider_spec
|
||||
|
||||
|
||||
class InferenceImpl(Impl):
|
||||
def __init__(self):
|
||||
super().__init__(Api.inference)
|
||||
|
||||
async def register_model(self, model: Model):
|
||||
return model
|
||||
|
||||
async def unregister_model(self, model_id: str):
|
||||
return model_id
|
||||
|
||||
|
||||
class SafetyImpl(Impl):
|
||||
def __init__(self):
|
||||
super().__init__(Api.safety)
|
||||
|
||||
async def register_shield(self, shield: Shield):
|
||||
return shield
|
||||
|
||||
|
||||
class VectorDBImpl(Impl):
|
||||
def __init__(self):
|
||||
super().__init__(Api.vector_io)
|
||||
|
||||
async def register_vector_db(self, vector_db: VectorDB):
|
||||
return vector_db
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str):
|
||||
return vector_db_id
|
||||
|
||||
|
||||
class DatasetsImpl(Impl):
|
||||
def __init__(self):
|
||||
super().__init__(Api.datasetio)
|
||||
|
||||
async def register_dataset(self, dataset: Dataset):
|
||||
return dataset
|
||||
|
||||
async def unregister_dataset(self, dataset_id: str):
|
||||
return dataset_id
|
||||
|
||||
|
||||
class ScoringFunctionsImpl(Impl):
|
||||
def __init__(self):
|
||||
super().__init__(Api.scoring)
|
||||
|
||||
async def list_scoring_functions(self):
|
||||
return []
|
||||
|
||||
async def register_scoring_function(self, scoring_fn):
|
||||
return scoring_fn
|
||||
|
||||
|
||||
class BenchmarksImpl(Impl):
|
||||
def __init__(self):
|
||||
super().__init__(Api.eval)
|
||||
|
||||
async def register_benchmark(self, benchmark):
|
||||
return benchmark
|
||||
|
||||
|
||||
class ToolGroupsImpl(Impl):
|
||||
def __init__(self):
|
||||
super().__init__(Api.tool_runtime)
|
||||
|
||||
async def register_tool(self, tool):
|
||||
return tool
|
||||
|
||||
async def unregister_tool(self, tool_name: str):
|
||||
return tool_name
|
||||
|
||||
async def list_runtime_tools(self, toolgroup_id, mcp_endpoint):
|
||||
return ListToolDefsResponse(
|
||||
data=[
|
||||
ToolDef(
|
||||
name="test-tool",
|
||||
description="Test tool",
|
||||
parameters=[ToolParameter(name="test-param", description="Test param", parameter_type="string")],
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_models_routing_table(dist_registry):
|
||||
table = ModelsRoutingTable({"test_provider": InferenceImpl()}, dist_registry)
|
||||
await table.initialize()
|
||||
|
||||
# Register multiple models and verify listing
|
||||
await table.register_model(model_id="test-model", provider_id="test_provider")
|
||||
await table.register_model(model_id="test-model-2", provider_id="test_provider")
|
||||
|
||||
models = await table.list_models()
|
||||
assert len(models.data) == 2
|
||||
model_ids = {m.identifier for m in models.data}
|
||||
assert "test-model" in model_ids
|
||||
assert "test-model-2" in model_ids
|
||||
|
||||
# Test openai list models
|
||||
openai_models = await table.openai_list_models()
|
||||
assert len(openai_models.data) == 2
|
||||
openai_model_ids = {m.id for m in openai_models.data}
|
||||
assert "test-model" in openai_model_ids
|
||||
assert "test-model-2" in openai_model_ids
|
||||
|
||||
# Test get_object_by_identifier
|
||||
model = await table.get_object_by_identifier("model", "test-model")
|
||||
assert model is not None
|
||||
assert model.identifier == "test-model"
|
||||
|
||||
# Test get_object_by_identifier on non-existent object
|
||||
non_existent = await table.get_object_by_identifier("model", "non-existent-model")
|
||||
assert non_existent is None
|
||||
|
||||
await table.unregister_model(model_id="test-model")
|
||||
await table.unregister_model(model_id="test-model-2")
|
||||
|
||||
models = await table.list_models()
|
||||
assert len(models.data) == 0
|
||||
|
||||
# Test openai list models
|
||||
openai_models = await table.openai_list_models()
|
||||
assert len(openai_models.data) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_shields_routing_table(dist_registry):
|
||||
table = ShieldsRoutingTable({"test_provider": SafetyImpl()}, dist_registry)
|
||||
await table.initialize()
|
||||
|
||||
# Register multiple shields and verify listing
|
||||
await table.register_shield(shield_id="test-shield", provider_id="test_provider")
|
||||
await table.register_shield(shield_id="test-shield-2", provider_id="test_provider")
|
||||
shields = await table.list_shields()
|
||||
|
||||
assert len(shields.data) == 2
|
||||
shield_ids = {s.identifier for s in shields.data}
|
||||
assert "test-shield" in shield_ids
|
||||
assert "test-shield-2" in shield_ids
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vectordbs_routing_table(dist_registry):
|
||||
table = VectorDBsRoutingTable({"test_provider": VectorDBImpl()}, dist_registry)
|
||||
await table.initialize()
|
||||
|
||||
m_table = ModelsRoutingTable({"test_providere": InferenceImpl()}, dist_registry)
|
||||
await m_table.initialize()
|
||||
await m_table.register_model(
|
||||
model_id="test-model",
|
||||
provider_id="test_providere",
|
||||
metadata={"embedding_dimension": 128},
|
||||
model_type=ModelType.embedding,
|
||||
)
|
||||
|
||||
# Register multiple vector databases and verify listing
|
||||
await table.register_vector_db(vector_db_id="test-vectordb", embedding_model="test-model")
|
||||
await table.register_vector_db(vector_db_id="test-vectordb-2", embedding_model="test-model")
|
||||
vector_dbs = await table.list_vector_dbs()
|
||||
|
||||
assert len(vector_dbs.data) == 2
|
||||
vector_db_ids = {v.identifier for v in vector_dbs.data}
|
||||
assert "test-vectordb" in vector_db_ids
|
||||
assert "test-vectordb-2" in vector_db_ids
|
||||
|
||||
await table.unregister_vector_db(vector_db_id="test-vectordb")
|
||||
await table.unregister_vector_db(vector_db_id="test-vectordb-2")
|
||||
|
||||
vector_dbs = await table.list_vector_dbs()
|
||||
assert len(vector_dbs.data) == 0
|
||||
|
||||
|
||||
async def test_datasets_routing_table(dist_registry):
|
||||
table = DatasetsRoutingTable({"localfs": DatasetsImpl()}, dist_registry)
|
||||
await table.initialize()
|
||||
|
||||
# Register multiple datasets and verify listing
|
||||
await table.register_dataset(
|
||||
dataset_id="test-dataset", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource(uri="test-uri")
|
||||
)
|
||||
await table.register_dataset(
|
||||
dataset_id="test-dataset-2", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource(uri="test-uri-2")
|
||||
)
|
||||
datasets = await table.list_datasets()
|
||||
|
||||
assert len(datasets.data) == 2
|
||||
dataset_ids = {d.identifier for d in datasets.data}
|
||||
assert "test-dataset" in dataset_ids
|
||||
assert "test-dataset-2" in dataset_ids
|
||||
|
||||
await table.unregister_dataset(dataset_id="test-dataset")
|
||||
await table.unregister_dataset(dataset_id="test-dataset-2")
|
||||
|
||||
datasets = await table.list_datasets()
|
||||
assert len(datasets.data) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scoring_functions_routing_table(dist_registry):
|
||||
table = ScoringFunctionsRoutingTable({"test_provider": ScoringFunctionsImpl()}, dist_registry)
|
||||
await table.initialize()
|
||||
|
||||
# Register multiple scoring functions and verify listing
|
||||
await table.register_scoring_function(
|
||||
scoring_fn_id="test-scoring-fn",
|
||||
provider_id="test_provider",
|
||||
description="Test scoring function",
|
||||
return_type=NumberType(),
|
||||
)
|
||||
await table.register_scoring_function(
|
||||
scoring_fn_id="test-scoring-fn-2",
|
||||
provider_id="test_provider",
|
||||
description="Another test scoring function",
|
||||
return_type=NumberType(),
|
||||
)
|
||||
scoring_functions = await table.list_scoring_functions()
|
||||
|
||||
assert len(scoring_functions.data) == 2
|
||||
scoring_fn_ids = {fn.identifier for fn in scoring_functions.data}
|
||||
assert "test-scoring-fn" in scoring_fn_ids
|
||||
assert "test-scoring-fn-2" in scoring_fn_ids
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_benchmarks_routing_table(dist_registry):
|
||||
table = BenchmarksRoutingTable({"test_provider": BenchmarksImpl()}, dist_registry)
|
||||
await table.initialize()
|
||||
|
||||
# Register multiple benchmarks and verify listing
|
||||
await table.register_benchmark(
|
||||
benchmark_id="test-benchmark",
|
||||
dataset_id="test-dataset",
|
||||
scoring_functions=["test-scoring-fn", "test-scoring-fn-2"],
|
||||
)
|
||||
benchmarks = await table.list_benchmarks()
|
||||
|
||||
assert len(benchmarks.data) == 1
|
||||
benchmark_ids = {b.identifier for b in benchmarks.data}
|
||||
assert "test-benchmark" in benchmark_ids
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_groups_routing_table(dist_registry):
|
||||
table = ToolGroupsRoutingTable({"test_provider": ToolGroupsImpl()}, dist_registry)
|
||||
await table.initialize()
|
||||
|
||||
# Register multiple tool groups and verify listing
|
||||
await table.register_tool_group(
|
||||
toolgroup_id="test-toolgroup",
|
||||
provider_id="test_provider",
|
||||
)
|
||||
tool_groups = await table.list_tool_groups()
|
||||
|
||||
assert len(tool_groups.data) == 1
|
||||
tool_group_ids = {tg.identifier for tg in tool_groups.data}
|
||||
assert "test-toolgroup" in tool_group_ids
|
||||
|
||||
await table.unregister_toolgroup(toolgroup_id="test-toolgroup")
|
||||
tool_groups = await table.list_tool_groups()
|
||||
assert len(tool_groups.data) == 0
|
|
@ -12,7 +12,7 @@ import pytest
|
|||
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.distribution.datatypes import ModelWithACL
|
||||
from llama_stack.distribution.server.auth import AccessAttributes
|
||||
from llama_stack.distribution.server.auth_providers import AccessAttributes
|
||||
from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry
|
||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||
from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
|
||||
|
|
|
@ -10,7 +10,9 @@ import pytest
|
|||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from llama_stack.distribution.datatypes import AccessAttributes
|
||||
from llama_stack.distribution.server.auth import AuthenticationMiddleware
|
||||
from llama_stack.distribution.server.auth_providers import AuthProviderConfig, AuthProviderType
|
||||
|
||||
|
||||
class MockResponse:
|
||||
|
@ -38,9 +40,23 @@ def invalid_api_key():
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def app(mock_auth_endpoint):
|
||||
def valid_token():
|
||||
return "valid.jwt.token"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def invalid_token():
|
||||
return "invalid.jwt.token"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def http_app(mock_auth_endpoint):
|
||||
app = FastAPI()
|
||||
app.add_middleware(AuthenticationMiddleware, auth_endpoint=mock_auth_endpoint)
|
||||
auth_config = AuthProviderConfig(
|
||||
provider_type=AuthProviderType.CUSTOM,
|
||||
config={"endpoint": mock_auth_endpoint},
|
||||
)
|
||||
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
|
||||
|
||||
@app.get("/test")
|
||||
def test_endpoint():
|
||||
|
@ -50,8 +66,29 @@ def app(mock_auth_endpoint):
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def client(app):
|
||||
return TestClient(app)
|
||||
def k8s_app():
|
||||
app = FastAPI()
|
||||
auth_config = AuthProviderConfig(
|
||||
provider_type=AuthProviderType.KUBERNETES,
|
||||
config={"api_server_url": "https://kubernetes.default.svc"},
|
||||
)
|
||||
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
|
||||
|
||||
@app.get("/test")
|
||||
def test_endpoint():
|
||||
return {"message": "Authentication successful"}
|
||||
|
||||
return app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def http_client(http_app):
|
||||
return TestClient(http_app)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def k8s_client(k8s_app):
|
||||
return TestClient(k8s_app)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -61,7 +98,7 @@ def mock_scope():
|
|||
"path": "/models/list",
|
||||
"headers": [
|
||||
(b"content-type", b"application/json"),
|
||||
(b"authorization", b"Bearer test-api-key"),
|
||||
(b"authorization", b"Bearer test.jwt.token"),
|
||||
(b"user-agent", b"test-user-agent"),
|
||||
],
|
||||
"query_string": b"limit=100&offset=0",
|
||||
|
@ -69,13 +106,38 @@ def mock_scope():
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_middleware(mock_auth_endpoint):
|
||||
def mock_http_middleware(mock_auth_endpoint):
|
||||
mock_app = AsyncMock()
|
||||
return AuthenticationMiddleware(mock_app, mock_auth_endpoint), mock_app
|
||||
auth_config = AuthProviderConfig(
|
||||
provider_type=AuthProviderType.CUSTOM,
|
||||
config={"endpoint": mock_auth_endpoint},
|
||||
)
|
||||
return AuthenticationMiddleware(mock_app, auth_config), mock_app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_k8s_middleware():
|
||||
mock_app = AsyncMock()
|
||||
auth_config = AuthProviderConfig(
|
||||
provider_type=AuthProviderType.KUBERNETES,
|
||||
config={"api_server_url": "https://kubernetes.default.svc"},
|
||||
)
|
||||
return AuthenticationMiddleware(mock_app, auth_config), mock_app
|
||||
|
||||
|
||||
async def mock_post_success(*args, **kwargs):
|
||||
return MockResponse(200, {"message": "Authentication successful"})
|
||||
return MockResponse(
|
||||
200,
|
||||
{
|
||||
"message": "Authentication successful",
|
||||
"access_attributes": {
|
||||
"roles": ["admin", "user"],
|
||||
"teams": ["ml-team", "nlp-team"],
|
||||
"projects": ["llama-3", "project-x"],
|
||||
"namespaces": ["research", "production"],
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
async def mock_post_failure(*args, **kwargs):
|
||||
|
@ -86,45 +148,46 @@ async def mock_post_exception(*args, **kwargs):
|
|||
raise Exception("Connection error")
|
||||
|
||||
|
||||
def test_missing_auth_header(client):
|
||||
response = client.get("/test")
|
||||
# HTTP Endpoint Tests
|
||||
def test_missing_auth_header(http_client):
|
||||
response = http_client.get("/test")
|
||||
assert response.status_code == 401
|
||||
assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
|
||||
|
||||
|
||||
def test_invalid_auth_header_format(client):
|
||||
response = client.get("/test", headers={"Authorization": "InvalidFormat token123"})
|
||||
def test_invalid_auth_header_format(http_client):
|
||||
response = http_client.get("/test", headers={"Authorization": "InvalidFormat token123"})
|
||||
assert response.status_code == 401
|
||||
assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
|
||||
|
||||
|
||||
@patch("httpx.AsyncClient.post", new=mock_post_success)
|
||||
def test_valid_authentication(client, valid_api_key):
|
||||
response = client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
|
||||
def test_valid_http_authentication(http_client, valid_api_key):
|
||||
response = http_client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"message": "Authentication successful"}
|
||||
|
||||
|
||||
@patch("httpx.AsyncClient.post", new=mock_post_failure)
|
||||
def test_invalid_authentication(client, invalid_api_key):
|
||||
response = client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
|
||||
def test_invalid_http_authentication(http_client, invalid_api_key):
|
||||
response = http_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
|
||||
assert response.status_code == 401
|
||||
assert "Authentication failed" in response.json()["error"]["message"]
|
||||
|
||||
|
||||
@patch("httpx.AsyncClient.post", new=mock_post_exception)
|
||||
def test_auth_service_error(client, valid_api_key):
|
||||
response = client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
|
||||
def test_http_auth_service_error(http_client, valid_api_key):
|
||||
response = http_client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
|
||||
assert response.status_code == 401
|
||||
assert "Authentication service error" in response.json()["error"]["message"]
|
||||
|
||||
|
||||
def test_auth_request_payload(client, valid_api_key, mock_auth_endpoint):
|
||||
def test_http_auth_request_payload(http_client, valid_api_key, mock_auth_endpoint):
|
||||
with patch("httpx.AsyncClient.post") as mock_post:
|
||||
mock_response = MockResponse(200, {"message": "Authentication successful"})
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client.get(
|
||||
http_client.get(
|
||||
"/test?param1=value1¶m2=value2",
|
||||
headers={
|
||||
"Authorization": f"Bearer {valid_api_key}",
|
||||
|
@ -149,40 +212,43 @@ def test_auth_request_payload(client, valid_api_key, mock_auth_endpoint):
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auth_middleware_with_access_attributes(mock_middleware, mock_scope):
|
||||
middleware, mock_app = mock_middleware
|
||||
async def test_http_middleware_with_access_attributes(mock_http_middleware, mock_scope):
|
||||
"""Test HTTP middleware behavior with access attributes"""
|
||||
middleware, mock_app = mock_http_middleware
|
||||
mock_receive = AsyncMock()
|
||||
mock_send = AsyncMock()
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client:
|
||||
mock_client_instance = AsyncMock()
|
||||
mock_client.return_value.__aenter__.return_value = mock_client_instance
|
||||
|
||||
mock_client_instance.post.return_value = MockResponse(
|
||||
with patch("httpx.AsyncClient.post") as mock_post:
|
||||
mock_response = MockResponse(
|
||||
200,
|
||||
{
|
||||
"message": "Authentication successful",
|
||||
"access_attributes": {
|
||||
"roles": ["admin", "user"],
|
||||
"teams": ["ml-team"],
|
||||
"projects": ["project-x", "project-y"],
|
||||
}
|
||||
"teams": ["ml-team", "nlp-team"],
|
||||
"projects": ["llama-3", "project-x"],
|
||||
"namespaces": ["research", "production"],
|
||||
},
|
||||
},
|
||||
)
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
await middleware(mock_scope, mock_receive, mock_send)
|
||||
|
||||
assert "user_attributes" in mock_scope
|
||||
assert mock_scope["user_attributes"]["roles"] == ["admin", "user"]
|
||||
assert mock_scope["user_attributes"]["teams"] == ["ml-team"]
|
||||
assert mock_scope["user_attributes"]["projects"] == ["project-x", "project-y"]
|
||||
attributes = mock_scope["user_attributes"]
|
||||
assert attributes["roles"] == ["admin", "user"]
|
||||
assert attributes["teams"] == ["ml-team", "nlp-team"]
|
||||
assert attributes["projects"] == ["llama-3", "project-x"]
|
||||
assert attributes["namespaces"] == ["research", "production"]
|
||||
|
||||
mock_app.assert_called_once_with(mock_scope, mock_receive, mock_send)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auth_middleware_no_attributes(mock_middleware, mock_scope):
|
||||
async def test_http_middleware_no_attributes(mock_http_middleware, mock_scope):
|
||||
"""Test middleware behavior with no access attributes"""
|
||||
middleware, mock_app = mock_middleware
|
||||
middleware, mock_app = mock_http_middleware
|
||||
mock_receive = AsyncMock()
|
||||
mock_send = AsyncMock()
|
||||
|
||||
|
@ -203,4 +269,104 @@ async def test_auth_middleware_no_attributes(mock_middleware, mock_scope):
|
|||
assert "user_attributes" in mock_scope
|
||||
attributes = mock_scope["user_attributes"]
|
||||
assert "namespaces" in attributes
|
||||
assert attributes["namespaces"] == ["test-api-key"]
|
||||
assert attributes["namespaces"] == ["test.jwt.token"]
|
||||
|
||||
|
||||
# Kubernetes Tests
|
||||
def test_missing_auth_header_k8s(k8s_client):
|
||||
response = k8s_client.get("/test")
|
||||
assert response.status_code == 401
|
||||
assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
|
||||
|
||||
|
||||
def test_invalid_auth_header_format_k8s(k8s_client):
|
||||
response = k8s_client.get("/test", headers={"Authorization": "InvalidFormat token123"})
|
||||
assert response.status_code == 401
|
||||
assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
|
||||
|
||||
|
||||
@patch("kubernetes.client.ApiClient")
|
||||
def test_valid_k8s_authentication(mock_api_client, k8s_client, valid_token):
|
||||
# Mock the Kubernetes client
|
||||
mock_client = AsyncMock()
|
||||
mock_api_client.return_value = mock_client
|
||||
|
||||
# Mock successful token validation
|
||||
mock_client.set_default_header = AsyncMock()
|
||||
|
||||
# Mock the token validation to return valid access attributes
|
||||
with patch("llama_stack.distribution.server.auth_providers.KubernetesAuthProvider.validate_token") as mock_validate:
|
||||
mock_validate.return_value = AccessAttributes(
|
||||
roles=["admin"], teams=["ml-team"], projects=["llama-3"], namespaces=["research"]
|
||||
)
|
||||
response = k8s_client.get("/test", headers={"Authorization": f"Bearer {valid_token}"})
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"message": "Authentication successful"}
|
||||
|
||||
|
||||
@patch("kubernetes.client.ApiClient")
|
||||
def test_invalid_k8s_authentication(mock_api_client, k8s_client, invalid_token):
|
||||
# Mock the Kubernetes client
|
||||
mock_client = AsyncMock()
|
||||
mock_api_client.return_value = mock_client
|
||||
|
||||
# Mock failed token validation by raising an exception
|
||||
with patch("llama_stack.distribution.server.auth_providers.KubernetesAuthProvider.validate_token") as mock_validate:
|
||||
mock_validate.side_effect = ValueError("Invalid or expired token")
|
||||
response = k8s_client.get("/test", headers={"Authorization": f"Bearer {invalid_token}"})
|
||||
assert response.status_code == 401
|
||||
assert "Invalid or expired token" in response.json()["error"]["message"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_k8s_middleware_with_access_attributes(mock_k8s_middleware, mock_scope):
|
||||
middleware, mock_app = mock_k8s_middleware
|
||||
mock_receive = AsyncMock()
|
||||
mock_send = AsyncMock()
|
||||
|
||||
with patch("kubernetes.client.ApiClient") as mock_api_client:
|
||||
mock_client = AsyncMock()
|
||||
mock_api_client.return_value = mock_client
|
||||
|
||||
# Mock successful token validation
|
||||
mock_client.set_default_header = AsyncMock()
|
||||
|
||||
# Mock token payload with access attributes
|
||||
mock_token_parts = ["header", "eyJzdWIiOiJhZG1pbiIsImdyb3VwcyI6WyJtbC10ZWFtIl19", "signature"]
|
||||
mock_scope["headers"][1] = (b"authorization", f"Bearer {'.'.join(mock_token_parts)}".encode())
|
||||
|
||||
await middleware(mock_scope, mock_receive, mock_send)
|
||||
|
||||
assert "user_attributes" in mock_scope
|
||||
assert mock_scope["user_attributes"]["roles"] == ["admin"]
|
||||
assert mock_scope["user_attributes"]["teams"] == ["ml-team"]
|
||||
|
||||
mock_app.assert_called_once_with(mock_scope, mock_receive, mock_send)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_k8s_middleware_no_attributes(mock_k8s_middleware, mock_scope):
|
||||
"""Test middleware behavior with no access attributes"""
|
||||
middleware, mock_app = mock_k8s_middleware
|
||||
mock_receive = AsyncMock()
|
||||
mock_send = AsyncMock()
|
||||
|
||||
with patch("kubernetes.client.ApiClient") as mock_api_client:
|
||||
mock_client = AsyncMock()
|
||||
mock_api_client.return_value = mock_client
|
||||
|
||||
# Mock successful token validation
|
||||
mock_client.set_default_header = AsyncMock()
|
||||
|
||||
# Mock token payload without access attributes
|
||||
mock_token_parts = ["header", "eyJzdWIiOiJhZG1pbiJ9", "signature"]
|
||||
mock_scope["headers"][1] = (b"authorization", f"Bearer {'.'.join(mock_token_parts)}".encode())
|
||||
|
||||
await middleware(mock_scope, mock_receive, mock_send)
|
||||
|
||||
assert "user_attributes" in mock_scope
|
||||
attributes = mock_scope["user_attributes"]
|
||||
assert "roles" in attributes
|
||||
assert attributes["roles"] == ["admin"]
|
||||
|
||||
mock_app.assert_called_once_with(mock_scope, mock_receive, mock_send)
|
||||
|
|
|
@ -13,3 +13,5 @@ test_exclusions:
|
|||
- test_chat_non_streaming_image
|
||||
- test_chat_streaming_image
|
||||
- test_chat_multi_turn_multiple_images
|
||||
- test_response_non_streaming_image
|
||||
- test_response_non_streaming_multi_turn_image
|
||||
|
|
|
@ -13,3 +13,5 @@ test_exclusions:
|
|||
- test_chat_non_streaming_image
|
||||
- test_chat_streaming_image
|
||||
- test_chat_multi_turn_multiple_images
|
||||
- test_response_non_streaming_image
|
||||
- test_response_non_streaming_multi_turn_image
|
||||
|
|
|
@ -13,3 +13,5 @@ test_exclusions:
|
|||
- test_chat_non_streaming_image
|
||||
- test_chat_streaming_image
|
||||
- test_chat_multi_turn_multiple_images
|
||||
- test_response_non_streaming_image
|
||||
- test_response_non_streaming_multi_turn_image
|
||||
|
|
|
@ -16,7 +16,7 @@ Description:
|
|||
|
||||
|
||||
Configuration:
|
||||
- Provider details (models, display names) are loaded from `tests/verifications/config.yaml`.
|
||||
- Provider details (models, display names) are loaded from `tests/verifications/conf/*.yaml`.
|
||||
- Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`.
|
||||
- Test results are stored in `tests/verifications/test_results/`.
|
||||
|
||||
|
|
|
@ -1,10 +1,15 @@
|
|||
# This is a temporary run file because model names used by the verification tests
|
||||
# are not quite consistent with various pre-existing distributions.
|
||||
#
|
||||
version: '2'
|
||||
image_name: openai-api-verification
|
||||
apis:
|
||||
- agents
|
||||
- inference
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
- safety
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: together
|
||||
|
@ -16,12 +21,12 @@ providers:
|
|||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY}
|
||||
api_key: ${env.FIREWORKS_API_KEY:}
|
||||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
api_key: ${env.GROQ_API_KEY}
|
||||
api_key: ${env.GROQ_API_KEY:}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
@ -44,7 +49,20 @@ providers:
|
|||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai-api-verification}/trace_store.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
excluded_categories: []
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
35
tests/verifications/openai_api/conftest.py
Normal file
35
tests/verifications/openai_api/conftest.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
"""Dynamically parametrize tests based on the selected provider and config."""
|
||||
if "model" in metafunc.fixturenames:
|
||||
provider = metafunc.config.getoption("provider")
|
||||
if not provider:
|
||||
print("Warning: --provider not specified. Skipping model parametrization.")
|
||||
metafunc.parametrize("model", [])
|
||||
return
|
||||
|
||||
try:
|
||||
config_data = _load_all_verification_configs()
|
||||
except (FileNotFoundError, IOError) as e:
|
||||
print(f"ERROR loading verification configs: {e}")
|
||||
config_data = {"providers": {}}
|
||||
|
||||
provider_config = config_data.get("providers", {}).get(provider)
|
||||
if provider_config:
|
||||
models = provider_config.get("models", [])
|
||||
if models:
|
||||
metafunc.parametrize("model", models)
|
||||
else:
|
||||
print(f"Warning: No models found for provider '{provider}' in config.")
|
||||
metafunc.parametrize("model", []) # Parametrize empty if no models found
|
||||
else:
|
||||
print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
|
||||
metafunc.parametrize("model", []) # Parametrize empty if provider not found
|
|
@ -5,14 +5,16 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
from openai import OpenAI
|
||||
|
||||
# --- Helper Functions ---
|
||||
|
||||
|
||||
# --- Helper Function to Load Config ---
|
||||
def _load_all_verification_configs():
|
||||
"""Load and aggregate verification configs from the conf/ directory."""
|
||||
# Note: Path is relative to *this* file (fixtures.py)
|
||||
|
@ -44,7 +46,30 @@ def _load_all_verification_configs():
|
|||
return {"providers": all_provider_configs}
|
||||
|
||||
|
||||
# --- End Helper Function ---
|
||||
def case_id_generator(case):
|
||||
"""Generate a test ID from the case's 'case_id' field, or use a default."""
|
||||
case_id = case.get("case_id")
|
||||
if isinstance(case_id, (str, int)):
|
||||
return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
|
||||
return None
|
||||
|
||||
|
||||
def should_skip_test(verification_config, provider, model, test_name_base):
|
||||
"""Check if a test should be skipped based on config exclusions."""
|
||||
provider_config = verification_config.get("providers", {}).get(provider)
|
||||
if not provider_config:
|
||||
return False # No config for provider, don't skip
|
||||
|
||||
exclusions = provider_config.get("test_exclusions", {}).get(model, [])
|
||||
return test_name_base in exclusions
|
||||
|
||||
|
||||
# Helper to get the base test name from the request object
|
||||
def get_base_test_name(request):
|
||||
return request.node.originalname
|
||||
|
||||
|
||||
# --- End Helper Functions ---
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
test_response_basic:
|
||||
test_name: test_response_basic
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "earth"
|
||||
input: "Which planet do humans live on?"
|
||||
output: "earth"
|
||||
- case_id: "saturn"
|
||||
input: "Which planet has rings around it with a name starting with letter S?"
|
||||
output: "saturn"
|
||||
|
||||
test_response_multi_turn:
|
||||
test_name: test_response_multi_turn
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "earth"
|
||||
turns:
|
||||
- input: "Which planet do humans live on?"
|
||||
output: "earth"
|
||||
- input: "What is the name of the planet from your previous response?"
|
||||
output: "earth"
|
||||
|
||||
test_response_web_search:
|
||||
test_name: test_response_web_search
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "llama_experts"
|
||||
input: "How many experts does the Llama 4 Maverick model have?"
|
||||
tools:
|
||||
- type: web_search
|
||||
search_context_size: "low"
|
||||
output: "128"
|
||||
|
||||
test_response_image:
|
||||
test_name: test_response_image
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "llama_image"
|
||||
input:
|
||||
- role: user
|
||||
content:
|
||||
- type: input_text
|
||||
text: "Identify the type of animal in this image."
|
||||
- type: input_image
|
||||
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
|
||||
output: "llama"
|
||||
|
||||
test_response_multi_turn_image:
|
||||
test_name: test_response_multi_turn_image
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "llama_image_search"
|
||||
turns:
|
||||
- input:
|
||||
- role: user
|
||||
content:
|
||||
- type: input_text
|
||||
text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
|
||||
- type: input_image
|
||||
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
|
||||
output: "llama"
|
||||
- input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick' and 'scout'."
|
||||
tools:
|
||||
- type: web_search
|
||||
output: "model"
|
|
@ -7,7 +7,6 @@
|
|||
import base64
|
||||
import copy
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
@ -16,7 +15,9 @@ from openai import APIError
|
|||
from pydantic import BaseModel
|
||||
|
||||
from tests.verifications.openai_api.fixtures.fixtures import (
|
||||
_load_all_verification_configs,
|
||||
case_id_generator,
|
||||
get_base_test_name,
|
||||
should_skip_test,
|
||||
)
|
||||
from tests.verifications.openai_api.fixtures.load import load_test_cases
|
||||
|
||||
|
@ -25,57 +26,6 @@ chat_completion_test_cases = load_test_cases("chat_completion")
|
|||
THIS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def case_id_generator(case):
|
||||
"""Generate a test ID from the case's 'case_id' field, or use a default."""
|
||||
case_id = case.get("case_id")
|
||||
if isinstance(case_id, (str, int)):
|
||||
return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
|
||||
return None
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
"""Dynamically parametrize tests based on the selected provider and config."""
|
||||
if "model" in metafunc.fixturenames:
|
||||
provider = metafunc.config.getoption("provider")
|
||||
if not provider:
|
||||
print("Warning: --provider not specified. Skipping model parametrization.")
|
||||
metafunc.parametrize("model", [])
|
||||
return
|
||||
|
||||
try:
|
||||
config_data = _load_all_verification_configs()
|
||||
except (FileNotFoundError, IOError) as e:
|
||||
print(f"ERROR loading verification configs: {e}")
|
||||
config_data = {"providers": {}}
|
||||
|
||||
provider_config = config_data.get("providers", {}).get(provider)
|
||||
if provider_config:
|
||||
models = provider_config.get("models", [])
|
||||
if models:
|
||||
metafunc.parametrize("model", models)
|
||||
else:
|
||||
print(f"Warning: No models found for provider '{provider}' in config.")
|
||||
metafunc.parametrize("model", []) # Parametrize empty if no models found
|
||||
else:
|
||||
print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
|
||||
metafunc.parametrize("model", []) # Parametrize empty if provider not found
|
||||
|
||||
|
||||
def should_skip_test(verification_config, provider, model, test_name_base):
|
||||
"""Check if a test should be skipped based on config exclusions."""
|
||||
provider_config = verification_config.get("providers", {}).get(provider)
|
||||
if not provider_config:
|
||||
return False # No config for provider, don't skip
|
||||
|
||||
exclusions = provider_config.get("test_exclusions", {}).get(model, [])
|
||||
return test_name_base in exclusions
|
||||
|
||||
|
||||
# Helper to get the base test name from the request object
|
||||
def get_base_test_name(request):
|
||||
return request.node.originalname
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multi_image_data():
|
||||
files = [
|
||||
|
|
166
tests/verifications/openai_api/test_responses.py
Normal file
166
tests/verifications/openai_api/test_responses.py
Normal file
|
@ -0,0 +1,166 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.verifications.openai_api.fixtures.fixtures import (
|
||||
case_id_generator,
|
||||
get_base_test_name,
|
||||
should_skip_test,
|
||||
)
|
||||
from tests.verifications.openai_api.fixtures.load import load_test_cases
|
||||
|
||||
responses_test_cases = load_test_cases("responses")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_basic"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_basic(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=case["input"],
|
||||
stream=False,
|
||||
)
|
||||
output_text = response.output_text.lower().strip()
|
||||
assert len(output_text) > 0
|
||||
assert case["output"].lower() in output_text
|
||||
|
||||
retrieved_response = openai_client.responses.retrieve(response_id=response.id)
|
||||
assert retrieved_response.output_text == response.output_text
|
||||
|
||||
next_response = openai_client.responses.create(
|
||||
model=model, input="Repeat your previous response in all caps.", previous_response_id=response.id
|
||||
)
|
||||
next_output_text = next_response.output_text.strip()
|
||||
assert case["output"].upper() in next_output_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_basic"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_streaming_basic(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=case["input"],
|
||||
stream=True,
|
||||
)
|
||||
streamed_content = []
|
||||
response_id = ""
|
||||
for chunk in response:
|
||||
if chunk.type == "response.completed":
|
||||
response_id = chunk.response.id
|
||||
streamed_content.append(chunk.response.output_text.strip())
|
||||
|
||||
assert len(streamed_content) > 0
|
||||
assert case["output"].lower() in "".join(streamed_content).lower()
|
||||
|
||||
retrieved_response = openai_client.responses.retrieve(response_id=response_id)
|
||||
assert retrieved_response.output_text == "".join(streamed_content)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_multi_turn"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_multi_turn(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
previous_response_id = None
|
||||
for turn in case["turns"]:
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=turn["input"],
|
||||
previous_response_id=previous_response_id,
|
||||
tools=turn["tools"] if "tools" in turn else None,
|
||||
)
|
||||
previous_response_id = response.id
|
||||
output_text = response.output_text.lower()
|
||||
assert turn["output"].lower() in output_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_web_search"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_web_search(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=case["input"],
|
||||
tools=case["tools"],
|
||||
stream=False,
|
||||
)
|
||||
assert len(response.output) > 1
|
||||
assert response.output[0].type == "web_search_call"
|
||||
assert response.output[0].status == "completed"
|
||||
assert response.output[1].type == "message"
|
||||
assert response.output[1].status == "completed"
|
||||
assert response.output[1].role == "assistant"
|
||||
assert len(response.output[1].content) > 0
|
||||
assert case["output"].lower() in response.output_text.lower().strip()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_image"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_image(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=case["input"],
|
||||
stream=False,
|
||||
)
|
||||
output_text = response.output_text.lower()
|
||||
assert case["output"].lower() in output_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_multi_turn_image"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_multi_turn_image(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
previous_response_id = None
|
||||
for turn in case["turns"]:
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=turn["input"],
|
||||
previous_response_id=previous_response_id,
|
||||
tools=turn["tools"] if "tools" in turn else None,
|
||||
)
|
||||
previous_response_id = response.id
|
||||
output_text = response.output_text.lower()
|
||||
assert turn["output"].lower() in output_text
|
125
uv.lock
generated
125
uv.lock
generated
|
@ -1,5 +1,4 @@
|
|||
version = 1
|
||||
revision = 1
|
||||
requires-python = ">=3.10"
|
||||
resolution-markers = [
|
||||
"(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
|
@ -676,6 +675,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "durationpy"
|
||||
version = "0.9"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/31/e9/f49c4e7fccb77fa5c43c2480e09a857a78b41e7331a75e128ed5df45c56b/durationpy-0.9.tar.gz", hash = "sha256:fd3feb0a69a0057d582ef643c355c40d2fa1c942191f914d12203b1a01ac722a", size = 3186 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4c/a3/ac312faeceffd2d8f86bc6dcb5c401188ba5a01bc88e69bed97578a0dfcd/durationpy-0.9-py3-none-any.whl", hash = "sha256:e65359a7af5cedad07fb77a2dd3f390f8eb0b74cb845589fa6c057086834dd38", size = 3461 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.2.2"
|
||||
|
@ -842,6 +850,20 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-auth"
|
||||
version = "2.38.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cachetools" },
|
||||
{ name = "pyasn1-modules" },
|
||||
{ name = "rsa" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c6/eb/d504ba1daf190af6b204a9d4714d457462b486043744901a6eeea711f913/google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4", size = 270866 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9d/47/603554949a37bca5b7f894d51896a9c534b9eab808e2520a748e081669d0/google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a", size = 210770 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "googleapis-common-protos"
|
||||
version = "1.67.0"
|
||||
|
@ -1289,6 +1311,28 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409", size = 28965 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kubernetes"
|
||||
version = "32.0.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
{ name = "durationpy" },
|
||||
{ name = "google-auth" },
|
||||
{ name = "oauthlib" },
|
||||
{ name = "python-dateutil" },
|
||||
{ name = "pyyaml" },
|
||||
{ name = "requests" },
|
||||
{ name = "requests-oauthlib" },
|
||||
{ name = "six" },
|
||||
{ name = "urllib3" },
|
||||
{ name = "websocket-client" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b7/e8/0598f0e8b4af37cd9b10d8b87386cf3173cb8045d834ab5f6ec347a758b3/kubernetes-32.0.1.tar.gz", hash = "sha256:42f43d49abd437ada79a79a16bd48a604d3471a117a8347e87db693f2ba0ba28", size = 946691 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/08/10/9f8af3e6f569685ce3af7faab51c8dd9d93b9c38eba339ca31c746119447/kubernetes-32.0.1-py2.py3-none-any.whl", hash = "sha256:35282ab8493b938b08ab5526c7ce66588232df00ef5e1dbe88a419107dc10998", size = 1988070 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "levenshtein"
|
||||
version = "0.27.1"
|
||||
|
@ -1374,7 +1418,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "llama-stack"
|
||||
version = "0.2.1"
|
||||
version = "0.2.4"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "blobfile" },
|
||||
|
@ -1384,6 +1428,7 @@ dependencies = [
|
|||
{ name = "huggingface-hub" },
|
||||
{ name = "jinja2" },
|
||||
{ name = "jsonschema" },
|
||||
{ name = "kubernetes" },
|
||||
{ name = "llama-stack-client" },
|
||||
{ name = "openai" },
|
||||
{ name = "pillow" },
|
||||
|
@ -1485,8 +1530,9 @@ requires-dist = [
|
|||
{ name = "jinja2", specifier = ">=3.1.6" },
|
||||
{ name = "jinja2", marker = "extra == 'codegen'", specifier = ">=3.1.6" },
|
||||
{ name = "jsonschema" },
|
||||
{ name = "llama-stack-client", specifier = ">=0.2.2" },
|
||||
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.1" },
|
||||
{ name = "kubernetes" },
|
||||
{ name = "llama-stack-client", specifier = ">=0.2.4" },
|
||||
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.4" },
|
||||
{ name = "mcp", marker = "extra == 'test'" },
|
||||
{ name = "myst-parser", marker = "extra == 'docs'" },
|
||||
{ name = "nbval", marker = "extra == 'dev'" },
|
||||
|
@ -1538,11 +1584,10 @@ requires-dist = [
|
|||
{ name = "types-setuptools", marker = "extra == 'dev'" },
|
||||
{ name = "uvicorn", marker = "extra == 'dev'" },
|
||||
]
|
||||
provides-extras = ["dev", "unit", "test", "docs", "codegen", "ui"]
|
||||
|
||||
[[package]]
|
||||
name = "llama-stack-client"
|
||||
version = "0.2.2"
|
||||
version = "0.2.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
|
@ -1559,9 +1604,9 @@ dependencies = [
|
|||
{ name = "tqdm" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fc/1c/7d3ab0e57195f21f9cf121fba2692ee8dc792793e5c82aa702602dda9bea/llama_stack_client-0.2.2.tar.gz", hash = "sha256:a0323b18b9f68172c639755652654452b7e72e28e77d95db5146e25d83002d34", size = 241914 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d9/bd/bbbac1a766f33f947bd105338a2a469ef3a9faef78da20436f3f5d0adc95/llama_stack_client-0.2.4.tar.gz", hash = "sha256:51df03c7172739c37c222fb25072ee5f1f2943037d1e23336eb7c2408a294825", size = 254328 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9e/68/bdd9cb19e2c151d9aa8bf91444dfa9675bc7913006d8e1e030fb79dbf8c5/llama_stack_client-0.2.2-py3-none-any.whl", hash = "sha256:2a4ef3edb861e9a3a734e6e5e65d9d3de1f10cd56c18d21d82253088d2758e53", size = 273307 },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/85/9f8bf39a9201be82d32e1cdb03629b552bcc94bb3348e0f154c0e20a2c43/llama_stack_client-0.2.4-py3-none-any.whl", hash = "sha256:7541c6179e9afd5a1a94eed4d151a76d10869e3bde2506b16a9bdb52fc0a7a84", size = 292723 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2022,6 +2067,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/17/7f/d322a4125405920401450118dbdc52e0384026bd669939484670ce8b2ab9/numpy-2.2.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:783145835458e60fa97afac25d511d00a1eca94d4a8f3ace9fe2043003c678e4", size = 12839607 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "oauthlib"
|
||||
version = "3.2.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6d/fa/fbf4001037904031639e6bfbfc02badfc7e12f137a8afa254df6c4c8a670/oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918", size = 177352 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7e/80/cab10959dc1faead58dc8384a781dfbf93cb4d33d50988f7a69f1b7c9bbe/oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", size = 151688 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "1.71.0"
|
||||
|
@ -2525,6 +2579,27 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/ed/bd/54907846383dcc7ee28772d7e646f6c34276a17da740002a5cefe90f04f7/pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:58d9397b2e273ef76264b45531e9d552d8ec8a6688b7390b5be44c02a37aade8", size = 42085744 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyasn1"
|
||||
version = "0.6.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyasn1-modules"
|
||||
version = "0.4.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pyasn1" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pycparser"
|
||||
version = "2.22"
|
||||
|
@ -3135,6 +3210,19 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "requests-oauthlib"
|
||||
version = "2.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "oauthlib" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rich"
|
||||
version = "13.9.4"
|
||||
|
@ -3234,6 +3322,18 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/9f/2e/c5c1689e80298d4e94c75b70faada4c25445739d91b94c211244a3ed7ed1/rpds_py-0.22.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:177c7c0fce2855833819c98e43c262007f42ce86651ffbb84f37883308cb0e7d", size = 233338 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rsa"
|
||||
version = "4.9"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pyasn1" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/aa/65/7d973b89c4d2351d7fb232c2e452547ddfa243e93131e7cfa766da627b52/rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21", size = 29711 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/49/97/fa78e3d2f65c02c8e1268b9aba606569fe97f6c8f7c2d74394553347c145/rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", size = 34315 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ruamel-yaml"
|
||||
version = "0.18.10"
|
||||
|
@ -4109,6 +4209,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "websocket-client"
|
||||
version = "1.8.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "websockets"
|
||||
version = "15.0"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue