mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
Merge branch 'main' into content-extension
This commit is contained in:
commit
354ed48598
227 changed files with 21224 additions and 10798 deletions
30
.github/actions/run-and-record-tests/action.yml
vendored
30
.github/actions/run-and-record-tests/action.yml
vendored
|
@ -2,13 +2,6 @@ name: 'Run and Record Tests'
|
||||||
description: 'Run integration tests and handle recording/artifact upload'
|
description: 'Run integration tests and handle recording/artifact upload'
|
||||||
|
|
||||||
inputs:
|
inputs:
|
||||||
test-subdirs:
|
|
||||||
description: 'Comma-separated list of test subdirectories to run'
|
|
||||||
required: true
|
|
||||||
test-pattern:
|
|
||||||
description: 'Regex pattern to pass to pytest -k'
|
|
||||||
required: false
|
|
||||||
default: ''
|
|
||||||
stack-config:
|
stack-config:
|
||||||
description: 'Stack configuration to use'
|
description: 'Stack configuration to use'
|
||||||
required: true
|
required: true
|
||||||
|
@ -18,10 +11,18 @@ inputs:
|
||||||
inference-mode:
|
inference-mode:
|
||||||
description: 'Inference mode (record or replay)'
|
description: 'Inference mode (record or replay)'
|
||||||
required: true
|
required: true
|
||||||
run-vision-tests:
|
test-suite:
|
||||||
description: 'Whether to run vision tests'
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
required: false
|
required: false
|
||||||
default: 'false'
|
default: ''
|
||||||
|
test-subdirs:
|
||||||
|
description: 'Comma-separated list of test subdirectories to run; overrides test-suite'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
test-pattern:
|
||||||
|
description: 'Regex pattern to pass to pytest -k'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
|
||||||
runs:
|
runs:
|
||||||
using: 'composite'
|
using: 'composite'
|
||||||
|
@ -42,7 +43,7 @@ runs:
|
||||||
--test-subdirs '${{ inputs.test-subdirs }}' \
|
--test-subdirs '${{ inputs.test-subdirs }}' \
|
||||||
--test-pattern '${{ inputs.test-pattern }}' \
|
--test-pattern '${{ inputs.test-pattern }}' \
|
||||||
--inference-mode '${{ inputs.inference-mode }}' \
|
--inference-mode '${{ inputs.inference-mode }}' \
|
||||||
${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }} \
|
--test-suite '${{ inputs.test-suite }}' \
|
||||||
| tee pytest-${{ inputs.inference-mode }}.log
|
| tee pytest-${{ inputs.inference-mode }}.log
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,12 +58,7 @@ runs:
|
||||||
echo "New recordings detected, committing and pushing"
|
echo "New recordings detected, committing and pushing"
|
||||||
git add tests/integration/recordings/
|
git add tests/integration/recordings/
|
||||||
|
|
||||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
git commit -m "Recordings update from CI (test-suite: ${{ inputs.test-suite }})"
|
||||||
git commit -m "Recordings update from CI (vision)"
|
|
||||||
else
|
|
||||||
git commit -m "Recordings update from CI"
|
|
||||||
fi
|
|
||||||
|
|
||||||
git fetch origin ${{ github.ref_name }}
|
git fetch origin ${{ github.ref_name }}
|
||||||
git rebase origin/${{ github.ref_name }}
|
git rebase origin/${{ github.ref_name }}
|
||||||
echo "Rebased successfully"
|
echo "Rebased successfully"
|
||||||
|
|
8
.github/actions/setup-ollama/action.yml
vendored
8
.github/actions/setup-ollama/action.yml
vendored
|
@ -1,17 +1,17 @@
|
||||||
name: Setup Ollama
|
name: Setup Ollama
|
||||||
description: Start Ollama
|
description: Start Ollama
|
||||||
inputs:
|
inputs:
|
||||||
run-vision-tests:
|
test-suite:
|
||||||
description: 'Run vision tests: "true" or "false"'
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
required: false
|
required: false
|
||||||
default: 'false'
|
default: ''
|
||||||
runs:
|
runs:
|
||||||
using: "composite"
|
using: "composite"
|
||||||
steps:
|
steps:
|
||||||
- name: Start Ollama
|
- name: Start Ollama
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
if [ "${{ inputs.test-suite }}" == "vision" ]; then
|
||||||
image="ollama-with-vision-model"
|
image="ollama-with-vision-model"
|
||||||
else
|
else
|
||||||
image="ollama-with-models"
|
image="ollama-with-models"
|
||||||
|
|
|
@ -12,10 +12,10 @@ inputs:
|
||||||
description: 'Provider to setup (ollama or vllm)'
|
description: 'Provider to setup (ollama or vllm)'
|
||||||
required: true
|
required: true
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
run-vision-tests:
|
test-suite:
|
||||||
description: 'Whether to setup provider for vision tests'
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
required: false
|
required: false
|
||||||
default: 'false'
|
default: ''
|
||||||
inference-mode:
|
inference-mode:
|
||||||
description: 'Inference mode (record or replay)'
|
description: 'Inference mode (record or replay)'
|
||||||
required: true
|
required: true
|
||||||
|
@ -33,7 +33,7 @@ runs:
|
||||||
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
|
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
|
||||||
uses: ./.github/actions/setup-ollama
|
uses: ./.github/actions/setup-ollama
|
||||||
with:
|
with:
|
||||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
test-suite: ${{ inputs.test-suite }}
|
||||||
|
|
||||||
- name: Setup vllm
|
- name: Setup vllm
|
||||||
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
|
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
|
||||||
|
|
3
.github/workflows/README.md
vendored
3
.github/workflows/README.md
vendored
|
@ -5,10 +5,11 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
| Name | File | Purpose |
|
| Name | File | Purpose |
|
||||||
| ---- | ---- | ------- |
|
| ---- | ---- | ------- |
|
||||||
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
||||||
|
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
|
||||||
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
||||||
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
||||||
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
||||||
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
|
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
|
||||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||||
|
|
57
.github/workflows/conformance.yml
vendored
Normal file
57
.github/workflows/conformance.yml
vendored
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
# API Conformance Tests
|
||||||
|
# This workflow ensures that API changes maintain backward compatibility and don't break existing integrations
|
||||||
|
# It runs schema validation and OpenAPI diff checks to catch breaking changes early
|
||||||
|
|
||||||
|
name: API Conformance Tests
|
||||||
|
|
||||||
|
run-name: Run the API Conformance test suite on the changes.
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths:
|
||||||
|
- 'llama_stack/**'
|
||||||
|
- '!llama_stack/ui/**'
|
||||||
|
- 'tests/**'
|
||||||
|
- 'uv.lock'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
- '.github/workflows/conformance.yml' # This workflow itself
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||||
|
# Cancel in-progress runs when new commits are pushed to avoid wasting CI resources
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
# Job to check if API schema changes maintain backward compatibility
|
||||||
|
check-schema-compatibility:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
# Using specific version 4.1.7 because 5.0.0 fails when trying to run this locally using `act`
|
||||||
|
# This ensures consistent behavior between local testing and CI
|
||||||
|
- name: Checkout PR Code
|
||||||
|
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||||
|
|
||||||
|
# Checkout the base branch to compare against (usually main)
|
||||||
|
# This allows us to diff the current changes against the previous state
|
||||||
|
- name: Checkout Base Branch
|
||||||
|
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event.pull_request.base.ref }}
|
||||||
|
path: 'base'
|
||||||
|
|
||||||
|
# Install oasdiff: https://github.com/oasdiff/oasdiff, a tool for detecting breaking changes in OpenAPI specs.
|
||||||
|
- name: Install oasdiff
|
||||||
|
run: |
|
||||||
|
curl -fsSL https://raw.githubusercontent.com/oasdiff/oasdiff/main/install.sh | sh
|
||||||
|
|
||||||
|
# Run oasdiff to detect breaking changes in the API specification
|
||||||
|
# This step will fail if incompatible changes are detected, preventing breaking changes from being merged
|
||||||
|
- name: Run OpenAPI Breaking Change Diff
|
||||||
|
run: |
|
||||||
|
oasdiff breaking --fail-on ERR base/docs/_static/llama-stack-spec.yaml docs/_static/llama-stack-spec.yaml --match-path '^/v1/openai/v1' \
|
||||||
|
--match-path '^/v1/vector-io' \
|
||||||
|
--match-path '^/v1/vector-dbs'
|
20
.github/workflows/integration-tests.yml
vendored
20
.github/workflows/integration-tests.yml
vendored
|
@ -1,6 +1,6 @@
|
||||||
name: Integration Tests (Replay)
|
name: Integration Tests (Replay)
|
||||||
|
|
||||||
run-name: Run the integration test suite from tests/integration in replay mode
|
run-name: Run the integration test suites from tests/integration in replay mode
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
@ -32,14 +32,6 @@ on:
|
||||||
description: 'Test against a specific provider'
|
description: 'Test against a specific provider'
|
||||||
type: string
|
type: string
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
test-subdirs:
|
|
||||||
description: 'Comma-separated list of test subdirectories to run'
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
test-pattern:
|
|
||||||
description: 'Regex pattern to pass to pytest -k'
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
# Skip concurrency for pushes to main - each commit should be tested independently
|
# Skip concurrency for pushes to main - each commit should be tested independently
|
||||||
|
@ -50,7 +42,7 @@ jobs:
|
||||||
|
|
||||||
run-replay-mode-tests:
|
run-replay-mode-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
|
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.test-suite) }}
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
|
@ -61,7 +53,7 @@ jobs:
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
run-vision-tests: [true, false]
|
test-suite: [base, vision]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -73,15 +65,13 @@ jobs:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
provider: ${{ matrix.provider }}
|
provider: ${{ matrix.provider }}
|
||||||
run-vision-tests: ${{ matrix.run-vision-tests }}
|
test-suite: ${{ matrix.test-suite }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-subdirs: ${{ inputs.test-subdirs }}
|
|
||||||
test-pattern: ${{ inputs.test-pattern }}
|
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
provider: ${{ matrix.provider }}
|
provider: ${{ matrix.provider }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
run-vision-tests: ${{ matrix.run-vision-tests }}
|
test-suite: ${{ matrix.test-suite }}
|
||||||
|
|
32
.github/workflows/record-integration-tests.yml
vendored
32
.github/workflows/record-integration-tests.yml
vendored
|
@ -10,18 +10,18 @@ run-name: Run the integration test suite from tests/integration
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
test-subdirs:
|
|
||||||
description: 'Comma-separated list of test subdirectories to run'
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
test-provider:
|
test-provider:
|
||||||
description: 'Test against a specific provider'
|
description: 'Test against a specific provider'
|
||||||
type: string
|
type: string
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
run-vision-tests:
|
test-suite:
|
||||||
description: 'Whether to run vision tests'
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
type: boolean
|
type: string
|
||||||
default: false
|
default: ''
|
||||||
|
test-subdirs:
|
||||||
|
description: 'Comma-separated list of test subdirectories to run; overrides test-suite'
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
test-pattern:
|
test-pattern:
|
||||||
description: 'Regex pattern to pass to pytest -k'
|
description: 'Regex pattern to pass to pytest -k'
|
||||||
type: string
|
type: string
|
||||||
|
@ -38,11 +38,11 @@ jobs:
|
||||||
- name: Echo workflow inputs
|
- name: Echo workflow inputs
|
||||||
run: |
|
run: |
|
||||||
echo "::group::Workflow Inputs"
|
echo "::group::Workflow Inputs"
|
||||||
echo "test-subdirs: ${{ inputs.test-subdirs }}"
|
|
||||||
echo "test-provider: ${{ inputs.test-provider }}"
|
|
||||||
echo "run-vision-tests: ${{ inputs.run-vision-tests }}"
|
|
||||||
echo "test-pattern: ${{ inputs.test-pattern }}"
|
|
||||||
echo "branch: ${{ github.ref_name }}"
|
echo "branch: ${{ github.ref_name }}"
|
||||||
|
echo "test-provider: ${{ inputs.test-provider }}"
|
||||||
|
echo "test-suite: ${{ inputs.test-suite }}"
|
||||||
|
echo "test-subdirs: ${{ inputs.test-subdirs }}"
|
||||||
|
echo "test-pattern: ${{ inputs.test-pattern }}"
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
|
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -56,15 +56,15 @@ jobs:
|
||||||
python-version: "3.12" # Use single Python version for recording
|
python-version: "3.12" # Use single Python version for recording
|
||||||
client-version: "latest"
|
client-version: "latest"
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
provider: ${{ inputs.test-provider || 'ollama' }}
|
||||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
test-suite: ${{ inputs.test-suite }}
|
||||||
inference-mode: 'record'
|
inference-mode: 'record'
|
||||||
|
|
||||||
- name: Run and record tests
|
- name: Run and record tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-pattern: ${{ inputs.test-pattern }}
|
|
||||||
test-subdirs: ${{ inputs.test-subdirs }}
|
|
||||||
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
provider: ${{ inputs.test-provider || 'ollama' }}
|
||||||
inference-mode: 'record'
|
inference-mode: 'record'
|
||||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
test-suite: ${{ inputs.test-suite }}
|
||||||
|
test-subdirs: ${{ inputs.test-subdirs }}
|
||||||
|
test-pattern: ${{ inputs.test-pattern }}
|
||||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -26,5 +26,7 @@ venv/
|
||||||
pytest-report.xml
|
pytest-report.xml
|
||||||
.coverage
|
.coverage
|
||||||
.python-version
|
.python-version
|
||||||
|
AGENTS.md
|
||||||
|
server.log
|
||||||
CLAUDE.md
|
CLAUDE.md
|
||||||
.claude/
|
.claude/
|
||||||
|
|
|
@ -86,7 +86,7 @@ repos:
|
||||||
language: python
|
language: python
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
require_serial: true
|
require_serial: true
|
||||||
files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
||||||
- id: provider-codegen
|
- id: provider-codegen
|
||||||
name: Provider Codegen
|
name: Provider Codegen
|
||||||
additional_dependencies:
|
additional_dependencies:
|
||||||
|
|
26
docs/_static/llama-stack-spec.html
vendored
26
docs/_static/llama-stack-spec.html
vendored
|
@ -4129,7 +4129,7 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.",
|
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = \"created_at\", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -4143,11 +4143,33 @@
|
||||||
},
|
},
|
||||||
"purpose": {
|
"purpose": {
|
||||||
"$ref": "#/components/schemas/OpenAIFilePurpose"
|
"$ref": "#/components/schemas/OpenAIFilePurpose"
|
||||||
|
},
|
||||||
|
"expires_after_anchor": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"expires_after_seconds": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"file",
|
"file",
|
||||||
"purpose"
|
"purpose",
|
||||||
|
"expires_after_anchor",
|
||||||
|
"expires_after_seconds"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
14
docs/_static/llama-stack-spec.yaml
vendored
14
docs/_static/llama-stack-spec.yaml
vendored
|
@ -2933,6 +2933,10 @@ paths:
|
||||||
- file: The File object (not file name) to be uploaded.
|
- file: The File object (not file name) to be uploaded.
|
||||||
|
|
||||||
- purpose: The intended purpose of the uploaded file.
|
- purpose: The intended purpose of the uploaded file.
|
||||||
|
|
||||||
|
- expires_after: Optional form values describing expiration for the file.
|
||||||
|
Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>.
|
||||||
|
Seconds must be between 3600 and 2592000 (1 hour to 30 days).
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
@ -2945,9 +2949,19 @@ paths:
|
||||||
format: binary
|
format: binary
|
||||||
purpose:
|
purpose:
|
||||||
$ref: '#/components/schemas/OpenAIFilePurpose'
|
$ref: '#/components/schemas/OpenAIFilePurpose'
|
||||||
|
expires_after_anchor:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: 'null'
|
||||||
|
expires_after_seconds:
|
||||||
|
oneOf:
|
||||||
|
- type: integer
|
||||||
|
- type: 'null'
|
||||||
required:
|
required:
|
||||||
- file
|
- file
|
||||||
- purpose
|
- purpose
|
||||||
|
- expires_after_anchor
|
||||||
|
- expires_after_seconds
|
||||||
required: true
|
required: true
|
||||||
/v1/openai/v1/models:
|
/v1/openai/v1/models:
|
||||||
get:
|
get:
|
||||||
|
|
|
@ -40,18 +40,15 @@ The system patches OpenAI and Ollama client methods to intercept calls before th
|
||||||
|
|
||||||
### Storage Architecture
|
### Storage Architecture
|
||||||
|
|
||||||
Recordings use a two-tier storage system optimized for both speed and debuggability:
|
Recordings are stored as JSON files in the recording directory. They are looked up by their request hash.
|
||||||
|
|
||||||
```
|
```
|
||||||
recordings/
|
recordings/
|
||||||
├── index.sqlite # Fast lookup by request hash
|
|
||||||
└── responses/
|
└── responses/
|
||||||
├── abc123def456.json # Individual response files
|
├── abc123def456.json # Individual response files
|
||||||
└── def789ghi012.json
|
└── def789ghi012.json
|
||||||
```
|
```
|
||||||
|
|
||||||
**SQLite index** enables O(log n) hash lookups and metadata queries without loading response bodies.
|
|
||||||
|
|
||||||
**JSON files** store complete request/response pairs in human-readable format for debugging.
|
**JSON files** store complete request/response pairs in human-readable format for debugging.
|
||||||
|
|
||||||
## Recording Modes
|
## Recording Modes
|
||||||
|
@ -166,8 +163,8 @@ This preserves type safety - when replayed, you get the same Pydantic objects wi
|
||||||
Control recording behavior globally:
|
Control recording behavior globally:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export LLAMA_STACK_TEST_INFERENCE_MODE=replay
|
export LLAMA_STACK_TEST_INFERENCE_MODE=replay # this is the default
|
||||||
export LLAMA_STACK_TEST_RECORDING_DIR=/path/to/recordings
|
export LLAMA_STACK_TEST_RECORDING_DIR=/path/to/recordings # default is tests/integration/recordings
|
||||||
pytest tests/integration/
|
pytest tests/integration/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@ image_name: kubernetes-benchmark-demo
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- inference
|
- inference
|
||||||
|
- safety
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
- vector_io
|
- vector_io
|
||||||
|
@ -30,6 +31,11 @@ providers:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
safety:
|
||||||
|
- provider_id: llama-guard
|
||||||
|
provider_type: inline::llama-guard
|
||||||
|
config:
|
||||||
|
excluded_categories: []
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
|
@ -95,6 +101,8 @@ models:
|
||||||
- model_id: ${env.INFERENCE_MODEL}
|
- model_id: ${env.INFERENCE_MODEL}
|
||||||
provider_id: vllm-inference
|
provider_id: vllm-inference
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
shields:
|
||||||
|
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
vector_dbs: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
|
|
|
@ -50,6 +50,7 @@ The following models are available by default:
|
||||||
- `meta/llama-3.2-11b-vision-instruct `
|
- `meta/llama-3.2-11b-vision-instruct `
|
||||||
- `meta/llama-3.2-90b-vision-instruct `
|
- `meta/llama-3.2-90b-vision-instruct `
|
||||||
- `meta/llama-3.3-70b-instruct `
|
- `meta/llama-3.3-70b-instruct `
|
||||||
|
- `nvidia/vila `
|
||||||
- `nvidia/llama-3.2-nv-embedqa-1b-v2 `
|
- `nvidia/llama-3.2-nv-embedqa-1b-v2 `
|
||||||
- `nvidia/nv-embedqa-e5-v5 `
|
- `nvidia/nv-embedqa-e5-v5 `
|
||||||
- `nvidia/nv-embedqa-mistral-7b-v2 `
|
- `nvidia/nv-embedqa-mistral-7b-v2 `
|
||||||
|
|
|
@ -18,12 +18,13 @@ embedding_model_id = (
|
||||||
).identifier
|
).identifier
|
||||||
embedding_dimension = em.metadata["embedding_dimension"]
|
embedding_dimension = em.metadata["embedding_dimension"]
|
||||||
|
|
||||||
_ = client.vector_dbs.register(
|
vector_db = client.vector_dbs.register(
|
||||||
vector_db_id=vector_db_id,
|
vector_db_id=vector_db_id,
|
||||||
embedding_model=embedding_model_id,
|
embedding_model=embedding_model_id,
|
||||||
embedding_dimension=embedding_dimension,
|
embedding_dimension=embedding_dimension,
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
)
|
)
|
||||||
|
vector_db_id = vector_db.identifier
|
||||||
source = "https://www.paulgraham.com/greatwork.html"
|
source = "https://www.paulgraham.com/greatwork.html"
|
||||||
print("rag_tool> Ingesting document:", source)
|
print("rag_tool> Ingesting document:", source)
|
||||||
document = RAGDocument(
|
document = RAGDocument(
|
||||||
|
@ -35,7 +36,7 @@ document = RAGDocument(
|
||||||
client.tool_runtime.rag_tool.insert(
|
client.tool_runtime.rag_tool.insert(
|
||||||
documents=[document],
|
documents=[document],
|
||||||
vector_db_id=vector_db_id,
|
vector_db_id=vector_db_id,
|
||||||
chunk_size_in_tokens=50,
|
chunk_size_in_tokens=100,
|
||||||
)
|
)
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
client,
|
client,
|
||||||
|
|
|
@ -15,8 +15,8 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man
|
||||||
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
||||||
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
||||||
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
||||||
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
||||||
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
||||||
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
@ -15,8 +15,8 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
|
||||||
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
||||||
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
||||||
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
||||||
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
||||||
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
||||||
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
@ -12,6 +12,60 @@ That means you'll get fast and efficient vector retrieval.
|
||||||
- Easy to use
|
- Easy to use
|
||||||
- Fully integrated with Llama Stack
|
- Fully integrated with Llama Stack
|
||||||
|
|
||||||
|
There are three implementations of search for PGVectoIndex available:
|
||||||
|
|
||||||
|
1. Vector Search:
|
||||||
|
- How it works:
|
||||||
|
- Uses PostgreSQL's vector extension (pgvector) to perform similarity search
|
||||||
|
- Compares query embeddings against stored embeddings using Cosine distance or other distance metrics
|
||||||
|
- Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance
|
||||||
|
|
||||||
|
-Characteristics:
|
||||||
|
- Semantic understanding - finds documents similar in meaning even if they don't share keywords
|
||||||
|
- Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
|
||||||
|
- Best for: Finding conceptually related content, handling synonyms, cross-language search
|
||||||
|
|
||||||
|
2. Keyword Search
|
||||||
|
- How it works:
|
||||||
|
- Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank
|
||||||
|
- Converts text to searchable tokens using to_tsvector('english', text). Default language is English.
|
||||||
|
- Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
|
||||||
|
|
||||||
|
- Characteristics:
|
||||||
|
- Lexical matching - finds exact keyword matches and variations
|
||||||
|
- Uses GIN (Generalized Inverted Index) for fast text search performance
|
||||||
|
- Scoring: Uses PostgreSQL's ts_rank function for relevance scoring
|
||||||
|
- Best for: Exact term matching, proper names, technical terms, Boolean-style queries
|
||||||
|
|
||||||
|
3. Hybrid Search
|
||||||
|
- How it works:
|
||||||
|
- Combines both vector and keyword search results
|
||||||
|
- Runs both searches independently, then merges results using configurable reranking
|
||||||
|
|
||||||
|
- Two reranking strategies available:
|
||||||
|
- Reciprocal Rank Fusion (RRF) - (default: 60.0)
|
||||||
|
- Weighted Average - (default: 0.5)
|
||||||
|
|
||||||
|
- Characteristics:
|
||||||
|
- Best of both worlds: semantic understanding + exact matching
|
||||||
|
- Documents appearing in both searches get boosted scores
|
||||||
|
- Configurable balance between semantic and lexical matching
|
||||||
|
- Best for: General-purpose search where you want both precision and recall
|
||||||
|
|
||||||
|
4. Database Schema
|
||||||
|
The PGVector implementation stores data optimized for all three search types:
|
||||||
|
CREATE TABLE vector_store_xxx (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
document JSONB, -- Original document
|
||||||
|
embedding vector(dimension), -- For vector search
|
||||||
|
content_text TEXT, -- Raw text content
|
||||||
|
tokenized_content TSVECTOR -- For keyword search
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for performance
|
||||||
|
CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search
|
||||||
|
-- Vector index created automatically by pgvector
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
To use PGVector in your Llama Stack project, follow these steps:
|
To use PGVector in your Llama Stack project, follow these steps:
|
||||||
|
@ -20,6 +74,25 @@ To use PGVector in your Llama Stack project, follow these steps:
|
||||||
2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
|
2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
|
||||||
3. Start storing and querying vectors.
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## This is an example how you can set up your environment for using PGVector
|
||||||
|
|
||||||
|
1. Export env vars:
|
||||||
|
```bash
|
||||||
|
export ENABLE_PGVECTOR=true
|
||||||
|
export PGVECTOR_HOST=localhost
|
||||||
|
export PGVECTOR_PORT=5432
|
||||||
|
export PGVECTOR_DB=llamastack
|
||||||
|
export PGVECTOR_USER=llamastack
|
||||||
|
export PGVECTOR_PASSWORD=llamastack
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Create DB:
|
||||||
|
```bash
|
||||||
|
psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
|
||||||
|
psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
|
||||||
|
psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
|
||||||
|
```
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
You can install PGVector using docker:
|
You can install PGVector using docker:
|
||||||
|
|
|
@ -17,6 +17,7 @@ Weaviate supports:
|
||||||
- Metadata filtering
|
- Metadata filtering
|
||||||
- Multi-modal retrieval
|
- Multi-modal retrieval
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
To use Weaviate in your Llama Stack project, follow these steps:
|
To use Weaviate in your Llama Stack project, follow these steps:
|
||||||
|
|
|
@ -478,7 +478,6 @@ llama-stack-client scoring_functions list
|
||||||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
|
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
|
||||||
┃ identifier ┃ provider_id ┃ description ┃ type ┃
|
┃ identifier ┃ provider_id ┃ description ┃ type ┃
|
||||||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
|
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
|
||||||
│ basic::bfcl │ basic │ BFCL complex scoring │ scoring_function │
|
|
||||||
│ basic::docvqa │ basic │ DocVQA Visual Question & Answer scoring function │ scoring_function │
|
│ basic::docvqa │ basic │ DocVQA Visual Question & Answer scoring function │ scoring_function │
|
||||||
│ basic::equality │ basic │ Returns 1.0 if the input is equal to the target, 0.0 │ scoring_function │
|
│ basic::equality │ basic │ Returns 1.0 if the input is equal to the target, 0.0 │ scoring_function │
|
||||||
│ │ │ otherwise. │ │
|
│ │ │ otherwise. │ │
|
||||||
|
|
|
@ -5,10 +5,10 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
from typing import Annotated, Literal, Protocol, runtime_checkable
|
from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
|
||||||
|
|
||||||
from fastapi import File, Form, Response, UploadFile
|
from fastapi import File, Form, Response, UploadFile
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack.apis.common.responses import Order
|
from llama_stack.apis.common.responses import Order
|
||||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||||
|
@ -49,6 +49,23 @@ class OpenAIFileObject(BaseModel):
|
||||||
purpose: OpenAIFilePurpose
|
purpose: OpenAIFilePurpose
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ExpiresAfter(BaseModel):
|
||||||
|
"""
|
||||||
|
Control expiration of uploaded files.
|
||||||
|
|
||||||
|
Params:
|
||||||
|
- anchor, must be "created_at"
|
||||||
|
- seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
|
||||||
|
"""
|
||||||
|
|
||||||
|
MIN: ClassVar[int] = 3600 # 1 hour
|
||||||
|
MAX: ClassVar[int] = 2592000 # 30 days
|
||||||
|
|
||||||
|
anchor: Literal["created_at"]
|
||||||
|
seconds: int = Field(..., ge=3600, le=2592000)
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ListOpenAIFileResponse(BaseModel):
|
class ListOpenAIFileResponse(BaseModel):
|
||||||
"""
|
"""
|
||||||
|
@ -92,6 +109,9 @@ class Files(Protocol):
|
||||||
self,
|
self,
|
||||||
file: Annotated[UploadFile, File()],
|
file: Annotated[UploadFile, File()],
|
||||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||||
|
expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
|
||||||
|
expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
|
||||||
|
# TODO: expires_after is producing strange openapi spec, params are showing up as a required w/ oneOf being null
|
||||||
) -> OpenAIFileObject:
|
) -> OpenAIFileObject:
|
||||||
"""
|
"""
|
||||||
Upload a file that can be used across various endpoints.
|
Upload a file that can be used across various endpoints.
|
||||||
|
@ -99,6 +119,7 @@ class Files(Protocol):
|
||||||
The file upload should be a multipart form request with:
|
The file upload should be a multipart form request with:
|
||||||
- file: The File object (not file name) to be uploaded.
|
- file: The File object (not file name) to be uploaded.
|
||||||
- purpose: The intended purpose of the uploaded file.
|
- purpose: The intended purpose of the uploaded file.
|
||||||
|
- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
|
||||||
|
|
||||||
:param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
|
:param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
|
||||||
:param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
|
:param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
|
||||||
|
|
|
@ -284,7 +284,15 @@ async def instantiate_providers(
|
||||||
if provider.provider_id is None:
|
if provider.provider_id is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
deps = {a: impls[a] for a in provider.spec.api_dependencies}
|
try:
|
||||||
|
deps = {a: impls[a] for a in provider.spec.api_dependencies}
|
||||||
|
except KeyError as e:
|
||||||
|
missing_api = e.args[0]
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Failed to resolve '{provider.spec.api.value}' provider '{provider.provider_id}' of type '{provider.spec.provider_type}': "
|
||||||
|
f"required dependency '{missing_api.value}' is not available. "
|
||||||
|
f"Please add a '{missing_api.value}' provider to your configuration or check if the provider is properly configured."
|
||||||
|
) from e
|
||||||
for a in provider.spec.optional_api_dependencies:
|
for a in provider.spec.optional_api_dependencies:
|
||||||
if a in impls:
|
if a in impls:
|
||||||
deps[a] = impls[a]
|
deps[a] = impls[a]
|
||||||
|
|
|
@ -527,7 +527,7 @@ class InferenceRouter(Inference):
|
||||||
|
|
||||||
# Store the response with the ID that will be returned to the client
|
# Store the response with the ID that will be returned to the client
|
||||||
if self.store:
|
if self.store:
|
||||||
await self.store.store_chat_completion(response, messages)
|
asyncio.create_task(self.store.store_chat_completion(response, messages))
|
||||||
|
|
||||||
if self.telemetry:
|
if self.telemetry:
|
||||||
metrics = self._construct_metrics(
|
metrics = self._construct_metrics(
|
||||||
|
@ -755,7 +755,7 @@ class InferenceRouter(Inference):
|
||||||
choices_data[idx] = {
|
choices_data[idx] = {
|
||||||
"content_parts": [],
|
"content_parts": [],
|
||||||
"tool_calls_builder": {},
|
"tool_calls_builder": {},
|
||||||
"finish_reason": None,
|
"finish_reason": "stop",
|
||||||
"logprobs_content_parts": [],
|
"logprobs_content_parts": [],
|
||||||
}
|
}
|
||||||
current_choice_data = choices_data[idx]
|
current_choice_data = choices_data[idx]
|
||||||
|
@ -855,4 +855,4 @@ class InferenceRouter(Inference):
|
||||||
object="chat.completion",
|
object="chat.completion",
|
||||||
)
|
)
|
||||||
logger.debug(f"InferenceRouter.completion_response: {final_response}")
|
logger.debug(f"InferenceRouter.completion_response: {final_response}")
|
||||||
await self.store.store_chat_completion(final_response, messages)
|
asyncio.create_task(self.store.store_chat_completion(final_response, messages))
|
||||||
|
|
|
@ -52,7 +52,6 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
||||||
provider_vector_db_id: str | None = None,
|
provider_vector_db_id: str | None = None,
|
||||||
vector_db_name: str | None = None,
|
vector_db_name: str | None = None,
|
||||||
) -> VectorDB:
|
) -> VectorDB:
|
||||||
provider_vector_db_id = provider_vector_db_id or vector_db_id
|
|
||||||
if provider_id is None:
|
if provider_id is None:
|
||||||
if len(self.impls_by_provider_id) > 0:
|
if len(self.impls_by_provider_id) > 0:
|
||||||
provider_id = list(self.impls_by_provider_id.keys())[0]
|
provider_id = list(self.impls_by_provider_id.keys())[0]
|
||||||
|
@ -69,14 +68,33 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
||||||
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
||||||
if "embedding_dimension" not in model.metadata:
|
if "embedding_dimension" not in model.metadata:
|
||||||
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
|
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
|
||||||
|
|
||||||
|
provider = self.impls_by_provider_id[provider_id]
|
||||||
|
logger.warning(
|
||||||
|
"VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
|
||||||
|
)
|
||||||
|
vector_store = await provider.openai_create_vector_store(
|
||||||
|
name=vector_db_name or vector_db_id,
|
||||||
|
embedding_model=embedding_model,
|
||||||
|
embedding_dimension=model.metadata["embedding_dimension"],
|
||||||
|
provider_id=provider_id,
|
||||||
|
provider_vector_db_id=provider_vector_db_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
vector_store_id = vector_store.id
|
||||||
|
actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
|
||||||
|
logger.warning(
|
||||||
|
f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
|
||||||
|
)
|
||||||
|
|
||||||
vector_db_data = {
|
vector_db_data = {
|
||||||
"identifier": vector_db_id,
|
"identifier": vector_store_id,
|
||||||
"type": ResourceType.vector_db.value,
|
"type": ResourceType.vector_db.value,
|
||||||
"provider_id": provider_id,
|
"provider_id": provider_id,
|
||||||
"provider_resource_id": provider_vector_db_id,
|
"provider_resource_id": actual_provider_vector_db_id,
|
||||||
"embedding_model": embedding_model,
|
"embedding_model": embedding_model,
|
||||||
"embedding_dimension": model.metadata["embedding_dimension"],
|
"embedding_dimension": model.metadata["embedding_dimension"],
|
||||||
"vector_db_name": vector_db_name,
|
"vector_db_name": vector_store.name,
|
||||||
}
|
}
|
||||||
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
|
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
|
||||||
await self.register_object(vector_db)
|
await self.register_object(vector_db)
|
||||||
|
|
|
@ -132,15 +132,17 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
elif isinstance(exc, ConflictError):
|
elif isinstance(exc, ConflictError):
|
||||||
return HTTPException(status_code=409, detail=str(exc))
|
return HTTPException(status_code=httpx.codes.CONFLICT, detail=str(exc))
|
||||||
elif isinstance(exc, ResourceNotFoundError):
|
elif isinstance(exc, ResourceNotFoundError):
|
||||||
return HTTPException(status_code=404, detail=str(exc))
|
return HTTPException(status_code=httpx.codes.NOT_FOUND, detail=str(exc))
|
||||||
elif isinstance(exc, ValueError):
|
elif isinstance(exc, ValueError):
|
||||||
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=f"Invalid value: {str(exc)}")
|
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=f"Invalid value: {str(exc)}")
|
||||||
elif isinstance(exc, BadRequestError):
|
elif isinstance(exc, BadRequestError):
|
||||||
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=str(exc))
|
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=str(exc))
|
||||||
elif isinstance(exc, PermissionError | AccessDeniedError):
|
elif isinstance(exc, PermissionError | AccessDeniedError):
|
||||||
return HTTPException(status_code=httpx.codes.FORBIDDEN, detail=f"Permission denied: {str(exc)}")
|
return HTTPException(status_code=httpx.codes.FORBIDDEN, detail=f"Permission denied: {str(exc)}")
|
||||||
|
elif isinstance(exc, ConnectionError | httpx.ConnectError):
|
||||||
|
return HTTPException(status_code=httpx.codes.BAD_GATEWAY, detail=str(exc))
|
||||||
elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
|
elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
|
||||||
return HTTPException(status_code=httpx.codes.GATEWAY_TIMEOUT, detail=f"Operation timed out: {str(exc)}")
|
return HTTPException(status_code=httpx.codes.GATEWAY_TIMEOUT, detail=f"Operation timed out: {str(exc)}")
|
||||||
elif isinstance(exc, NotImplementedError):
|
elif isinstance(exc, NotImplementedError):
|
||||||
|
|
|
@ -105,12 +105,12 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
||||||
|
|
||||||
method = getattr(impls[api], register_method)
|
method = getattr(impls[api], register_method)
|
||||||
for obj in objects:
|
for obj in objects:
|
||||||
logger.debug(f"registering {rsrc.capitalize()} {obj} for provider {obj.provider_id}")
|
if hasattr(obj, "provider_id"):
|
||||||
|
# Do not register models on disabled providers
|
||||||
# Do not register models on disabled providers
|
if not obj.provider_id or obj.provider_id == "__disabled__":
|
||||||
if hasattr(obj, "provider_id") and (not obj.provider_id or obj.provider_id == "__disabled__"):
|
logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.")
|
||||||
logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.")
|
continue
|
||||||
continue
|
logger.debug(f"registering {rsrc.capitalize()} {obj} for provider {obj.provider_id}")
|
||||||
|
|
||||||
# we want to maintain the type information in arguments to method.
|
# we want to maintain the type information in arguments to method.
|
||||||
# instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
|
# instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
|
||||||
|
|
|
@ -11,9 +11,7 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
template = get_starter_distribution_template()
|
template = get_starter_distribution_template(name="ci-tests")
|
||||||
name = "ci-tests"
|
|
||||||
template.name = name
|
|
||||||
template.description = "CI tests for Llama Stack"
|
template.description = "CI tests for Llama Stack"
|
||||||
|
|
||||||
return template
|
return template
|
||||||
|
|
|
@ -89,28 +89,28 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db
|
||||||
- provider_id: sqlite-vec
|
- provider_id: sqlite-vec
|
||||||
provider_type: inline::sqlite-vec
|
provider_type: inline::sqlite-vec
|
||||||
config:
|
config:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db
|
||||||
- provider_id: ${env.MILVUS_URL:+milvus}
|
- provider_id: ${env.MILVUS_URL:+milvus}
|
||||||
provider_type: inline::milvus
|
provider_type: inline::milvus
|
||||||
config:
|
config:
|
||||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db
|
||||||
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMADB_URL:=}
|
url: ${env.CHROMADB_URL:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests/}/chroma_remote_registry.db
|
||||||
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
||||||
provider_type: remote::pgvector
|
provider_type: remote::pgvector
|
||||||
config:
|
config:
|
||||||
|
@ -121,15 +121,15 @@ providers:
|
||||||
password: ${env.PGVECTOR_PASSWORD:=}
|
password: ${env.PGVECTOR_PASSWORD:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db
|
||||||
files:
|
files:
|
||||||
- provider_id: meta-reference-files
|
- provider_id: meta-reference-files
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -134,6 +134,11 @@ models:
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
provider_model_id: meta/llama-3.3-70b-instruct
|
provider_model_id: meta/llama-3.3-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: nvidia/vila
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: nvidia/vila
|
||||||
|
model_type: llm
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 2048
|
embedding_dimension: 2048
|
||||||
context_length: 8192
|
context_length: 8192
|
||||||
|
|
|
@ -43,7 +43,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
||||||
"openai",
|
"openai",
|
||||||
[
|
[
|
||||||
ProviderModelEntry(
|
ProviderModelEntry(
|
||||||
provider_model_id="openai/gpt-4o",
|
provider_model_id="gpt-4o",
|
||||||
model_type=ModelType.llm,
|
model_type=ModelType.llm,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
|
@ -53,7 +53,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
||||||
"anthropic",
|
"anthropic",
|
||||||
[
|
[
|
||||||
ProviderModelEntry(
|
ProviderModelEntry(
|
||||||
provider_model_id="anthropic/claude-3-5-sonnet-latest",
|
provider_model_id="claude-3-5-sonnet-latest",
|
||||||
model_type=ModelType.llm,
|
model_type=ModelType.llm,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
|
@ -206,13 +206,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
uri="huggingface://datasets/llamastack/math_500?split=test",
|
uri="huggingface://datasets/llamastack/math_500?split=test",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
DatasetInput(
|
|
||||||
dataset_id="bfcl",
|
|
||||||
purpose=DatasetPurpose.eval_messages_answer,
|
|
||||||
source=URIDataSource(
|
|
||||||
uri="huggingface://datasets/llamastack/bfcl_v3?split=train",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
DatasetInput(
|
DatasetInput(
|
||||||
dataset_id="ifeval",
|
dataset_id="ifeval",
|
||||||
purpose=DatasetPurpose.eval_messages_answer,
|
purpose=DatasetPurpose.eval_messages_answer,
|
||||||
|
@ -250,11 +243,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
dataset_id="math_500",
|
dataset_id="math_500",
|
||||||
scoring_functions=["basic::regex_parser_math_response"],
|
scoring_functions=["basic::regex_parser_math_response"],
|
||||||
),
|
),
|
||||||
BenchmarkInput(
|
|
||||||
benchmark_id="meta-reference-bfcl",
|
|
||||||
dataset_id="bfcl",
|
|
||||||
scoring_functions=["basic::bfcl"],
|
|
||||||
),
|
|
||||||
BenchmarkInput(
|
BenchmarkInput(
|
||||||
benchmark_id="meta-reference-ifeval",
|
benchmark_id="meta-reference-ifeval",
|
||||||
dataset_id="ifeval",
|
dataset_id="ifeval",
|
||||||
|
|
|
@ -136,14 +136,14 @@ inference_store:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: openai/gpt-4o
|
model_id: gpt-4o
|
||||||
provider_id: openai
|
provider_id: openai
|
||||||
provider_model_id: openai/gpt-4o
|
provider_model_id: gpt-4o
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: anthropic/claude-3-5-sonnet-latest
|
model_id: claude-3-5-sonnet-latest
|
||||||
provider_id: anthropic
|
provider_id: anthropic
|
||||||
provider_model_id: anthropic/claude-3-5-sonnet-latest
|
provider_model_id: claude-3-5-sonnet-latest
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: gemini/gemini-1.5-flash
|
model_id: gemini/gemini-1.5-flash
|
||||||
|
@ -188,12 +188,6 @@ datasets:
|
||||||
uri: huggingface://datasets/llamastack/math_500?split=test
|
uri: huggingface://datasets/llamastack/math_500?split=test
|
||||||
metadata: {}
|
metadata: {}
|
||||||
dataset_id: math_500
|
dataset_id: math_500
|
||||||
- purpose: eval/messages-answer
|
|
||||||
source:
|
|
||||||
type: uri
|
|
||||||
uri: huggingface://datasets/llamastack/bfcl_v3?split=train
|
|
||||||
metadata: {}
|
|
||||||
dataset_id: bfcl
|
|
||||||
- purpose: eval/messages-answer
|
- purpose: eval/messages-answer
|
||||||
source:
|
source:
|
||||||
type: uri
|
type: uri
|
||||||
|
@ -228,11 +222,6 @@ benchmarks:
|
||||||
- basic::regex_parser_math_response
|
- basic::regex_parser_math_response
|
||||||
metadata: {}
|
metadata: {}
|
||||||
benchmark_id: meta-reference-math-500
|
benchmark_id: meta-reference-math-500
|
||||||
- dataset_id: bfcl
|
|
||||||
scoring_functions:
|
|
||||||
- basic::bfcl
|
|
||||||
metadata: {}
|
|
||||||
benchmark_id: meta-reference-bfcl
|
|
||||||
- dataset_id: ifeval
|
- dataset_id: ifeval
|
||||||
scoring_functions:
|
scoring_functions:
|
||||||
- basic::ifeval
|
- basic::ifeval
|
||||||
|
|
|
@ -89,28 +89,28 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/faiss_store.db
|
||||||
- provider_id: sqlite-vec
|
- provider_id: sqlite-vec
|
||||||
provider_type: inline::sqlite-vec
|
provider_type: inline::sqlite-vec
|
||||||
config:
|
config:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec_registry.db
|
||||||
- provider_id: ${env.MILVUS_URL:+milvus}
|
- provider_id: ${env.MILVUS_URL:+milvus}
|
||||||
provider_type: inline::milvus
|
provider_type: inline::milvus
|
||||||
config:
|
config:
|
||||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/milvus_registry.db
|
||||||
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMADB_URL:=}
|
url: ${env.CHROMADB_URL:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu/}/chroma_remote_registry.db
|
||||||
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
||||||
provider_type: remote::pgvector
|
provider_type: remote::pgvector
|
||||||
config:
|
config:
|
||||||
|
@ -121,15 +121,15 @@ providers:
|
||||||
password: ${env.PGVECTOR_PASSWORD:=}
|
password: ${env.PGVECTOR_PASSWORD:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/pgvector_registry.db
|
||||||
files:
|
files:
|
||||||
- provider_id: meta-reference-files
|
- provider_id: meta-reference-files
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/files_metadata.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -11,9 +11,7 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
template = get_starter_distribution_template()
|
template = get_starter_distribution_template(name="starter-gpu")
|
||||||
name = "starter-gpu"
|
|
||||||
template.name = name
|
|
||||||
template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
|
template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
|
||||||
|
|
||||||
template.providers["post_training"] = [
|
template.providers["post_training"] = [
|
||||||
|
|
|
@ -99,9 +99,8 @@ def get_remote_inference_providers() -> list[Provider]:
|
||||||
return inference_providers
|
return inference_providers
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
||||||
remote_inference_providers = get_remote_inference_providers()
|
remote_inference_providers = get_remote_inference_providers()
|
||||||
name = "starter"
|
|
||||||
|
|
||||||
providers = {
|
providers = {
|
||||||
"inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers]
|
"inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers]
|
||||||
|
|
|
@ -178,9 +178,9 @@ class ReferenceBatchesImpl(Batches):
|
||||||
|
|
||||||
# TODO: set expiration time for garbage collection
|
# TODO: set expiration time for garbage collection
|
||||||
|
|
||||||
if endpoint not in ["/v1/chat/completions"]:
|
if endpoint not in ["/v1/chat/completions", "/v1/completions"]:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Invalid endpoint: {endpoint}. Supported values: /v1/chat/completions. Code: invalid_value. Param: endpoint",
|
f"Invalid endpoint: {endpoint}. Supported values: /v1/chat/completions, /v1/completions. Code: invalid_value. Param: endpoint",
|
||||||
)
|
)
|
||||||
|
|
||||||
if completion_window != "24h":
|
if completion_window != "24h":
|
||||||
|
@ -424,13 +424,21 @@ class ReferenceBatchesImpl(Batches):
|
||||||
)
|
)
|
||||||
valid = False
|
valid = False
|
||||||
|
|
||||||
for param, expected_type, type_string in [
|
if batch.endpoint == "/v1/chat/completions":
|
||||||
("model", str, "a string"),
|
required_params = [
|
||||||
# messages is specific to /v1/chat/completions
|
("model", str, "a string"),
|
||||||
# we could skip validating messages here and let inference fail. however,
|
# messages is specific to /v1/chat/completions
|
||||||
# that would be a very expensive way to find out messages is wrong.
|
# we could skip validating messages here and let inference fail. however,
|
||||||
("messages", list, "an array"), # TODO: allow messages to be a string?
|
# that would be a very expensive way to find out messages is wrong.
|
||||||
]:
|
("messages", list, "an array"), # TODO: allow messages to be a string?
|
||||||
|
]
|
||||||
|
else: # /v1/completions
|
||||||
|
required_params = [
|
||||||
|
("model", str, "a string"),
|
||||||
|
("prompt", str, "a string"), # TODO: allow prompt to be a list of strings??
|
||||||
|
]
|
||||||
|
|
||||||
|
for param, expected_type, type_string in required_params:
|
||||||
if param not in body:
|
if param not in body:
|
||||||
errors.append(
|
errors.append(
|
||||||
BatchError(
|
BatchError(
|
||||||
|
@ -591,20 +599,37 @@ class ReferenceBatchesImpl(Batches):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# TODO(SECURITY): review body for security issues
|
# TODO(SECURITY): review body for security issues
|
||||||
request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]]
|
if request.url == "/v1/chat/completions":
|
||||||
chat_response = await self.inference_api.openai_chat_completion(**request.body)
|
request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]]
|
||||||
|
chat_response = await self.inference_api.openai_chat_completion(**request.body)
|
||||||
|
|
||||||
# this is for mypy, we don't allow streaming so we'll get the right type
|
# this is for mypy, we don't allow streaming so we'll get the right type
|
||||||
assert hasattr(chat_response, "model_dump_json"), "Chat response must have model_dump_json method"
|
assert hasattr(chat_response, "model_dump_json"), "Chat response must have model_dump_json method"
|
||||||
return {
|
return {
|
||||||
"id": request_id,
|
"id": request_id,
|
||||||
"custom_id": request.custom_id,
|
"custom_id": request.custom_id,
|
||||||
"response": {
|
"response": {
|
||||||
"status_code": 200,
|
"status_code": 200,
|
||||||
"request_id": request_id, # TODO: should this be different?
|
"request_id": request_id, # TODO: should this be different?
|
||||||
"body": chat_response.model_dump_json(),
|
"body": chat_response.model_dump_json(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
else: # /v1/completions
|
||||||
|
completion_response = await self.inference_api.openai_completion(**request.body)
|
||||||
|
|
||||||
|
# this is for mypy, we don't allow streaming so we'll get the right type
|
||||||
|
assert hasattr(completion_response, "model_dump_json"), (
|
||||||
|
"Completion response must have model_dump_json method"
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"id": request_id,
|
||||||
|
"custom_id": request.custom_id,
|
||||||
|
"response": {
|
||||||
|
"status_code": 200,
|
||||||
|
"request_id": request_id,
|
||||||
|
"body": completion_response.model_dump_json(),
|
||||||
|
},
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.info(f"Error processing request {request.custom_id} in batch {batch_id}: {e}")
|
logger.info(f"Error processing request {request.custom_id} in batch {batch_id}: {e}")
|
||||||
return {
|
return {
|
||||||
|
|
|
@ -86,11 +86,16 @@ class LocalfsFilesImpl(Files):
|
||||||
self,
|
self,
|
||||||
file: Annotated[UploadFile, File()],
|
file: Annotated[UploadFile, File()],
|
||||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||||
|
expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
|
||||||
|
expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
|
||||||
) -> OpenAIFileObject:
|
) -> OpenAIFileObject:
|
||||||
"""Upload a file that can be used across various endpoints."""
|
"""Upload a file that can be used across various endpoints."""
|
||||||
if not self.sql_store:
|
if not self.sql_store:
|
||||||
raise RuntimeError("Files provider not initialized")
|
raise RuntimeError("Files provider not initialized")
|
||||||
|
|
||||||
|
if expires_after_anchor is not None or expires_after_seconds is not None:
|
||||||
|
raise NotImplementedError("File expiration is not supported by this provider")
|
||||||
|
|
||||||
file_id = self._generate_file_id()
|
file_id = self._generate_file_id()
|
||||||
file_path = self._get_file_path(file_id)
|
file_path = self._get_file_path(file_id)
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,6 @@ from llama_stack.providers.utils.common.data_schema_validator import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import BasicScoringConfig
|
from .config import BasicScoringConfig
|
||||||
from .scoring_fn.bfcl_scoring_fn import BFCLScoringFn
|
|
||||||
from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn
|
from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn
|
||||||
from .scoring_fn.equality_scoring_fn import EqualityScoringFn
|
from .scoring_fn.equality_scoring_fn import EqualityScoringFn
|
||||||
from .scoring_fn.ifeval_scoring_fn import IfEvalScoringFn
|
from .scoring_fn.ifeval_scoring_fn import IfEvalScoringFn
|
||||||
|
@ -37,7 +36,6 @@ FIXED_FNS = [
|
||||||
SubsetOfScoringFn,
|
SubsetOfScoringFn,
|
||||||
RegexParserScoringFn,
|
RegexParserScoringFn,
|
||||||
RegexParserMathResponseScoringFn,
|
RegexParserMathResponseScoringFn,
|
||||||
BFCLScoringFn,
|
|
||||||
IfEvalScoringFn,
|
IfEvalScoringFn,
|
||||||
DocVQAScoringFn,
|
DocVQAScoringFn,
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,93 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from llama_stack.apis.scoring import ScoringResultRow
|
|
||||||
from llama_stack.apis.scoring_functions import ScoringFnParams
|
|
||||||
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
|
|
||||||
|
|
||||||
from ..utils.bfcl.ast_parser import decode_ast
|
|
||||||
from ..utils.bfcl.checker import ast_checker, is_empty_output
|
|
||||||
from .fn_defs.bfcl import bfcl
|
|
||||||
|
|
||||||
|
|
||||||
def postprocess(x: dict[str, Any], test_category: str) -> dict[str, Any]:
|
|
||||||
contain_func_call = False
|
|
||||||
error = None
|
|
||||||
error_type = None
|
|
||||||
checker_result = {}
|
|
||||||
try:
|
|
||||||
prediction = decode_ast(x["generated_answer"], x["language"]) or ""
|
|
||||||
contain_func_call = True
|
|
||||||
# if not is_function_calling_format_output(prediction):
|
|
||||||
if is_empty_output(prediction):
|
|
||||||
contain_func_call = False
|
|
||||||
error = "Did not output in the specified format. Note: the model_result is wrapped in a string to ensure json serializability."
|
|
||||||
error_type = "ast_decoder:decoder_wrong_output_format"
|
|
||||||
else:
|
|
||||||
checker_result = ast_checker(
|
|
||||||
json.loads(x["function"]),
|
|
||||||
prediction,
|
|
||||||
json.loads(x["ground_truth"]),
|
|
||||||
x["language"],
|
|
||||||
test_category=test_category,
|
|
||||||
model_name="",
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
prediction = ""
|
|
||||||
error = f"Invalid syntax. Failed to decode AST. {str(e)}"
|
|
||||||
error_type = "ast_decoder:decoder_failed"
|
|
||||||
return {
|
|
||||||
"prediction": prediction,
|
|
||||||
"contain_func_call": contain_func_call,
|
|
||||||
"valid": checker_result.get("valid", False),
|
|
||||||
"error": error or checker_result.get("error", ""),
|
|
||||||
"error_type": error_type or checker_result.get("error_type", ""),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def gen_valid(x: dict[str, Any]) -> dict[str, float]:
|
|
||||||
return {"valid": x["valid"]}
|
|
||||||
|
|
||||||
|
|
||||||
def gen_relevance_acc(x: dict[str, Any]) -> dict[str, float]:
|
|
||||||
# This function serves for both relevance and irrelevance tests, which share the exact opposite logic.
|
|
||||||
# If `test_category` is "irrelevance", the model is expected to output no function call.
|
|
||||||
# No function call means either the AST decoding fails (a error message is generated) or the decoded AST does not contain any function call (such as a empty list, `[]`).
|
|
||||||
# If `test_category` is "relevance", the model is expected to output to a function call, and empty list doesn't count as a function call.
|
|
||||||
acc = not x["contain_func_call"] if "irrelevance" in x["id"] else x["contain_func_call"]
|
|
||||||
return {"valid": float(acc)}
|
|
||||||
|
|
||||||
|
|
||||||
class BFCLScoringFn(RegisteredBaseScoringFn):
|
|
||||||
"""
|
|
||||||
A scoring_fn for BFCL
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs) -> None:
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.supported_fn_defs_registry = {
|
|
||||||
bfcl.identifier: bfcl,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def score_row(
|
|
||||||
self,
|
|
||||||
input_row: dict[str, Any],
|
|
||||||
scoring_fn_identifier: str | None = "bfcl",
|
|
||||||
scoring_params: ScoringFnParams | None = None,
|
|
||||||
) -> ScoringResultRow:
|
|
||||||
test_category = re.sub(r"_[0-9_-]+$", "", input_row["id"])
|
|
||||||
score_result = postprocess(input_row, test_category)
|
|
||||||
if test_category in {"irrelevance", "live_relevance", "live_irrelevance"}:
|
|
||||||
score = gen_relevance_acc(score_result)["valid"]
|
|
||||||
else:
|
|
||||||
score = gen_valid(score_result)["valid"]
|
|
||||||
return {
|
|
||||||
"score": float(score),
|
|
||||||
}
|
|
|
@ -1,21 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
from llama_stack.apis.common.type_system import NumberType
|
|
||||||
from llama_stack.apis.scoring_functions import (
|
|
||||||
AggregationFunctionType,
|
|
||||||
BasicScoringFnParams,
|
|
||||||
ScoringFn,
|
|
||||||
)
|
|
||||||
|
|
||||||
bfcl = ScoringFn(
|
|
||||||
identifier="basic::bfcl",
|
|
||||||
description="BFCL complex scoring",
|
|
||||||
return_type=NumberType(),
|
|
||||||
provider_id="basic",
|
|
||||||
provider_resource_id="bfcl",
|
|
||||||
params=BasicScoringFnParams(aggregation_functions=[AggregationFunctionType.accuracy]),
|
|
||||||
)
|
|
|
@ -1,296 +0,0 @@
|
||||||
# ruff: noqa
|
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
import ast
|
|
||||||
|
|
||||||
from .tree_sitter import get_parser
|
|
||||||
|
|
||||||
|
|
||||||
def parse_java_function_call(source_code):
|
|
||||||
if not source_code.endswith(";"):
|
|
||||||
source_code += ";" # Necessary for the parser not to register an error
|
|
||||||
parser = get_parser("java")
|
|
||||||
tree = parser.parse(bytes(source_code, "utf8"))
|
|
||||||
root_node = tree.root_node
|
|
||||||
|
|
||||||
if root_node.has_error:
|
|
||||||
raise Exception("Error parsing java the source code.")
|
|
||||||
|
|
||||||
def get_text(node):
|
|
||||||
"""Returns the text represented by the node."""
|
|
||||||
return source_code[node.start_byte : node.end_byte]
|
|
||||||
|
|
||||||
def traverse_node(node, nested=False):
|
|
||||||
if node.type == "string_literal":
|
|
||||||
if nested:
|
|
||||||
return get_text(node)
|
|
||||||
# Strip surrounding quotes from string literals
|
|
||||||
return get_text(node)[1:-1]
|
|
||||||
elif node.type == "character_literal":
|
|
||||||
if nested:
|
|
||||||
return get_text(node)
|
|
||||||
# Strip surrounding single quotes from character literals
|
|
||||||
return get_text(node)[1:-1]
|
|
||||||
"""Traverse the node to collect texts for complex structures."""
|
|
||||||
if node.type in [
|
|
||||||
"identifier",
|
|
||||||
"class_literal",
|
|
||||||
"type_identifier",
|
|
||||||
"method_invocation",
|
|
||||||
]:
|
|
||||||
return get_text(node)
|
|
||||||
elif node.type == "array_creation_expression":
|
|
||||||
# Handle array creation expression specifically
|
|
||||||
type_node = node.child_by_field_name("type")
|
|
||||||
value_node = node.child_by_field_name("value")
|
|
||||||
type_text = traverse_node(type_node, True)
|
|
||||||
value_text = traverse_node(value_node, True)
|
|
||||||
return f"new {type_text}[]{value_text}"
|
|
||||||
elif node.type == "object_creation_expression":
|
|
||||||
# Handle object creation expression specifically
|
|
||||||
type_node = node.child_by_field_name("type")
|
|
||||||
arguments_node = node.child_by_field_name("arguments")
|
|
||||||
type_text = traverse_node(type_node, True)
|
|
||||||
if arguments_node:
|
|
||||||
# Process each argument carefully, avoiding unnecessary punctuation
|
|
||||||
argument_texts = []
|
|
||||||
for child in arguments_node.children:
|
|
||||||
if child.type not in [
|
|
||||||
",",
|
|
||||||
"(",
|
|
||||||
")",
|
|
||||||
]: # Exclude commas and parentheses
|
|
||||||
argument_text = traverse_node(child, True)
|
|
||||||
argument_texts.append(argument_text)
|
|
||||||
arguments_text = ", ".join(argument_texts)
|
|
||||||
return f"new {type_text}({arguments_text})"
|
|
||||||
else:
|
|
||||||
return f"new {type_text}()"
|
|
||||||
elif node.type == "set":
|
|
||||||
# Handling sets specifically
|
|
||||||
items = [traverse_node(n, True) for n in node.children if n.type not in [",", "set"]]
|
|
||||||
return "{" + ", ".join(items) + "}"
|
|
||||||
|
|
||||||
elif node.child_count > 0:
|
|
||||||
return "".join(traverse_node(child, True) for child in node.children)
|
|
||||||
else:
|
|
||||||
return get_text(node)
|
|
||||||
|
|
||||||
def extract_arguments(args_node):
|
|
||||||
arguments = {}
|
|
||||||
for child in args_node.children:
|
|
||||||
if child.type == "assignment_expression":
|
|
||||||
# For named parameters
|
|
||||||
name_node, value_node = child.children[0], child.children[2]
|
|
||||||
name = get_text(name_node)
|
|
||||||
value = traverse_node(value_node)
|
|
||||||
if name in arguments:
|
|
||||||
if not isinstance(arguments[name], list):
|
|
||||||
arguments[name] = [arguments[name]]
|
|
||||||
arguments[name].append(value)
|
|
||||||
else:
|
|
||||||
arguments[name] = value
|
|
||||||
# arguments.append({'name': name, 'value': value})
|
|
||||||
elif child.type in ["identifier", "class_literal", "set"]:
|
|
||||||
# For unnamed parameters and handling sets
|
|
||||||
value = traverse_node(child)
|
|
||||||
if None in arguments:
|
|
||||||
if not isinstance(arguments[None], list):
|
|
||||||
arguments[None] = [arguments[None]]
|
|
||||||
arguments[None].append(value)
|
|
||||||
else:
|
|
||||||
arguments[None] = value
|
|
||||||
return arguments
|
|
||||||
|
|
||||||
def traverse(node):
|
|
||||||
if node.type == "method_invocation":
|
|
||||||
# Extract the function name and its arguments
|
|
||||||
method_name = get_text(node.child_by_field_name("name"))
|
|
||||||
class_name_node = node.child_by_field_name("object")
|
|
||||||
if class_name_node:
|
|
||||||
class_name = get_text(class_name_node)
|
|
||||||
function_name = f"{class_name}.{method_name}"
|
|
||||||
else:
|
|
||||||
function_name = method_name
|
|
||||||
arguments_node = node.child_by_field_name("arguments")
|
|
||||||
if arguments_node:
|
|
||||||
arguments = extract_arguments(arguments_node)
|
|
||||||
for key, value in arguments.items():
|
|
||||||
if isinstance(value, list):
|
|
||||||
raise Exception("Error: Multiple arguments with the same name are not supported.")
|
|
||||||
return [{function_name: arguments}]
|
|
||||||
|
|
||||||
else:
|
|
||||||
for child in node.children:
|
|
||||||
result = traverse(child)
|
|
||||||
if result:
|
|
||||||
return result
|
|
||||||
|
|
||||||
result = traverse(root_node)
|
|
||||||
return result if result else {}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_javascript_function_call(source_code):
|
|
||||||
if not source_code.endswith(";"):
|
|
||||||
source_code += ";" # Necessary for the parser not to register an error
|
|
||||||
parser = get_parser("javascript")
|
|
||||||
# Parse the source code
|
|
||||||
tree = parser.parse(bytes(source_code, "utf8"))
|
|
||||||
root_node = tree.root_node
|
|
||||||
if root_node.has_error:
|
|
||||||
raise Exception("Error js parsing the source code.")
|
|
||||||
|
|
||||||
# Function to recursively extract argument details
|
|
||||||
def extract_arguments(node):
|
|
||||||
args = {}
|
|
||||||
for child in node.children:
|
|
||||||
if child.type == "assignment_expression":
|
|
||||||
# Extract left (name) and right (value) parts of the assignment
|
|
||||||
name = child.children[0].text.decode("utf-8")
|
|
||||||
value = child.children[2].text.decode("utf-8")
|
|
||||||
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
|
|
||||||
value = value[1:-1] # Trim the quotation marks
|
|
||||||
if name in args:
|
|
||||||
if not isinstance(args[name], list):
|
|
||||||
args[name] = [args[name]]
|
|
||||||
args[name].append(value)
|
|
||||||
else:
|
|
||||||
args[name] = value
|
|
||||||
|
|
||||||
elif child.type == "identifier" or child.type == "true":
|
|
||||||
# Handle non-named arguments and boolean values
|
|
||||||
value = child.text.decode("utf-8")
|
|
||||||
if None in args:
|
|
||||||
if not isinstance(args[None], list):
|
|
||||||
args[None] = [args[None]]
|
|
||||||
args[None].append(value)
|
|
||||||
else:
|
|
||||||
args[None] = value
|
|
||||||
return args
|
|
||||||
|
|
||||||
# Find the function call and extract its name and arguments
|
|
||||||
if root_node.type == "program":
|
|
||||||
for child in root_node.children:
|
|
||||||
if child.type == "expression_statement":
|
|
||||||
for sub_child in child.children:
|
|
||||||
if sub_child.type == "call_expression":
|
|
||||||
function_name = sub_child.children[0].text.decode("utf8")
|
|
||||||
arguments_node = sub_child.children[1]
|
|
||||||
parameters = extract_arguments(arguments_node)
|
|
||||||
for key, value in parameters.items():
|
|
||||||
if isinstance(value, list):
|
|
||||||
raise Exception("Error: Multiple arguments with the same name are not supported.")
|
|
||||||
result = [{function_name: parameters}]
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def ast_parse(input_str, language="Python"):
|
|
||||||
if language == "Python":
|
|
||||||
cleaned_input = input_str.strip("[]'")
|
|
||||||
parsed = ast.parse(cleaned_input, mode="eval")
|
|
||||||
extracted = []
|
|
||||||
if isinstance(parsed.body, ast.Call):
|
|
||||||
extracted.append(resolve_ast_call(parsed.body))
|
|
||||||
else:
|
|
||||||
for elem in parsed.body.elts:
|
|
||||||
extracted.append(resolve_ast_call(elem))
|
|
||||||
return extracted
|
|
||||||
elif language == "Java":
|
|
||||||
return parse_java_function_call(input_str[1:-1]) # Remove the [ and ] from the string
|
|
||||||
elif language == "JavaScript":
|
|
||||||
return parse_javascript_function_call(input_str[1:-1])
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(f"Unsupported language: {language}")
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_ast_call(elem):
|
|
||||||
# Handle nested attributes for deeply nested module paths
|
|
||||||
func_parts = []
|
|
||||||
func_part = elem.func
|
|
||||||
while isinstance(func_part, ast.Attribute):
|
|
||||||
func_parts.append(func_part.attr)
|
|
||||||
func_part = func_part.value
|
|
||||||
if isinstance(func_part, ast.Name):
|
|
||||||
func_parts.append(func_part.id)
|
|
||||||
func_name = ".".join(reversed(func_parts))
|
|
||||||
args_dict = {}
|
|
||||||
# Parse when args are simply passed as an unnamed dictionary arg
|
|
||||||
for arg in elem.args:
|
|
||||||
if isinstance(arg, ast.Dict):
|
|
||||||
for key, value in zip(arg.keys, arg.values):
|
|
||||||
if isinstance(key, ast.Constant):
|
|
||||||
arg_name = key.value
|
|
||||||
output = resolve_ast_by_type(value)
|
|
||||||
args_dict[arg_name] = output
|
|
||||||
for arg in elem.keywords:
|
|
||||||
output = resolve_ast_by_type(arg.value)
|
|
||||||
args_dict[arg.arg] = output
|
|
||||||
return {func_name: args_dict}
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_ast_by_type(value):
|
|
||||||
if isinstance(value, ast.Constant):
|
|
||||||
if value.value is Ellipsis:
|
|
||||||
output = "..."
|
|
||||||
else:
|
|
||||||
output = value.value
|
|
||||||
elif isinstance(value, ast.UnaryOp):
|
|
||||||
output = -value.operand.value
|
|
||||||
elif isinstance(value, ast.List):
|
|
||||||
output = [resolve_ast_by_type(v) for v in value.elts]
|
|
||||||
elif isinstance(value, ast.Dict):
|
|
||||||
output = {resolve_ast_by_type(k): resolve_ast_by_type(v) for k, v in zip(value.keys, value.values)}
|
|
||||||
elif isinstance(value, ast.NameConstant): # Added this condition to handle boolean values
|
|
||||||
output = value.value
|
|
||||||
elif isinstance(value, ast.BinOp): # Added this condition to handle function calls as arguments
|
|
||||||
output = eval(ast.unparse(value))
|
|
||||||
elif isinstance(value, ast.Name):
|
|
||||||
output = value.id
|
|
||||||
elif isinstance(value, ast.Call):
|
|
||||||
if len(value.keywords) == 0:
|
|
||||||
output = ast.unparse(value)
|
|
||||||
else:
|
|
||||||
output = resolve_ast_call(value)
|
|
||||||
elif isinstance(value, ast.Tuple):
|
|
||||||
output = tuple(resolve_ast_by_type(v) for v in value.elts)
|
|
||||||
elif isinstance(value, ast.Lambda):
|
|
||||||
output = eval(ast.unparse(value.body[0].value))
|
|
||||||
elif isinstance(value, ast.Ellipsis):
|
|
||||||
output = "..."
|
|
||||||
elif isinstance(value, ast.Subscript):
|
|
||||||
try:
|
|
||||||
output = ast.unparse(value.body[0].value)
|
|
||||||
except:
|
|
||||||
output = ast.unparse(value.value) + "[" + ast.unparse(value.slice) + "]"
|
|
||||||
else:
|
|
||||||
raise Exception(f"Unsupported AST type: {type(value)}")
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
def decode_ast(result, language="Python"):
|
|
||||||
func = result
|
|
||||||
func = func.replace("\n", "") # remove new line characters
|
|
||||||
if not func.startswith("["):
|
|
||||||
func = "[" + func
|
|
||||||
if not func.endswith("]"):
|
|
||||||
func = func + "]"
|
|
||||||
decoded_output = ast_parse(func, language)
|
|
||||||
return decoded_output
|
|
||||||
|
|
||||||
|
|
||||||
def decode_execute(result):
|
|
||||||
func = result
|
|
||||||
func = func.replace("\n", "") # remove new line characters
|
|
||||||
if not func.startswith("["):
|
|
||||||
func = "[" + func
|
|
||||||
if not func.endswith("]"):
|
|
||||||
func = func + "]"
|
|
||||||
decode_output = ast_parse(func)
|
|
||||||
execution_list = []
|
|
||||||
for function_call in decode_output:
|
|
||||||
for key, value in function_call.items():
|
|
||||||
execution_list.append(f"{key}({','.join([f'{k}={repr(v)}' for k, v in value.items()])})")
|
|
||||||
return execution_list
|
|
|
@ -1,989 +0,0 @@
|
||||||
# ruff: noqa
|
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
# Comment out for now until we actually use the rest checker in evals
|
|
||||||
# import requests # Do not remove this import even though it seems to be unused. It's used in the executable_checker_rest function.
|
|
||||||
|
|
||||||
|
|
||||||
class NoAPIKeyError(Exception):
|
|
||||||
def __init__(self):
|
|
||||||
self.message = "❗️Please fill in the API keys in the function_credential_config.json file. If you do not provide the API keys, the executable test category results will be inaccurate."
|
|
||||||
super().__init__(self.message)
|
|
||||||
|
|
||||||
|
|
||||||
REAL_TIME_MATCH_ALLOWED_DIFFERENCE = 0.2
|
|
||||||
|
|
||||||
|
|
||||||
JAVA_TYPE_CONVERSION = {
|
|
||||||
"byte": int,
|
|
||||||
"short": int,
|
|
||||||
"integer": int,
|
|
||||||
"float": float,
|
|
||||||
"double": float,
|
|
||||||
"long": int,
|
|
||||||
"boolean": bool,
|
|
||||||
"char": str,
|
|
||||||
"Array": list,
|
|
||||||
"ArrayList": list,
|
|
||||||
"Set": set,
|
|
||||||
"HashMap": dict,
|
|
||||||
"Hashtable": dict,
|
|
||||||
"Queue": list, # this can be `queue.Queue` as well, for simplicity we check with list
|
|
||||||
"Stack": list,
|
|
||||||
"String": str,
|
|
||||||
"any": str,
|
|
||||||
}
|
|
||||||
|
|
||||||
JS_TYPE_CONVERSION = {
|
|
||||||
"String": str,
|
|
||||||
"integer": int,
|
|
||||||
"float": float,
|
|
||||||
"Bigint": int,
|
|
||||||
"Boolean": bool,
|
|
||||||
"dict": dict,
|
|
||||||
"array": list,
|
|
||||||
"any": str,
|
|
||||||
}
|
|
||||||
|
|
||||||
# We switch to conditional import for the following two imports to avoid unnecessary installations.
|
|
||||||
# User doesn't need to setup the tree-sitter packages if they are not running the test for that language.
|
|
||||||
# from js_type_converter import js_type_converter
|
|
||||||
# from java_type_converter import java_type_converter
|
|
||||||
|
|
||||||
PYTHON_TYPE_MAPPING = {
|
|
||||||
"string": str,
|
|
||||||
"integer": int,
|
|
||||||
"float": float,
|
|
||||||
"boolean": bool,
|
|
||||||
"array": list,
|
|
||||||
"tuple": list,
|
|
||||||
"dict": dict,
|
|
||||||
"any": str,
|
|
||||||
}
|
|
||||||
|
|
||||||
# This is the list of types that we need to recursively check its values
|
|
||||||
PYTHON_NESTED_TYPE_CHECK_LIST = ["array", "tuple"]
|
|
||||||
|
|
||||||
|
|
||||||
NESTED_CONVERSION_TYPE_LIST = ["Array", "ArrayList", "array"]
|
|
||||||
|
|
||||||
|
|
||||||
#### Helper functions for AST ####
|
|
||||||
def find_description(func_descriptions, name):
|
|
||||||
if type(func_descriptions) == list:
|
|
||||||
for func_description in func_descriptions:
|
|
||||||
if func_description["name"] == name:
|
|
||||||
return func_description
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
# it is a dict, there is only one function
|
|
||||||
return func_descriptions
|
|
||||||
|
|
||||||
|
|
||||||
def get_possible_answer_type(possible_answer: list):
|
|
||||||
for answer in possible_answer:
|
|
||||||
if answer != "": # Optional parameter
|
|
||||||
return type(answer)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def type_checker(
|
|
||||||
param: str,
|
|
||||||
value,
|
|
||||||
possible_answer: list,
|
|
||||||
expected_type_description: str,
|
|
||||||
expected_type_converted,
|
|
||||||
nested_type_converted,
|
|
||||||
):
|
|
||||||
# NOTE: This type checker only supports nested type checking for one level deep.
|
|
||||||
# We didn't implement recursive type checking for nested types, as it's not needed for the current use case and it's very complex.
|
|
||||||
|
|
||||||
result: Any = {
|
|
||||||
"valid": True,
|
|
||||||
"error": [],
|
|
||||||
"is_variable": False,
|
|
||||||
"error_type": "type_error:simple",
|
|
||||||
}
|
|
||||||
|
|
||||||
is_variable = False
|
|
||||||
# check for the case where a variable is used instead of a actual value.
|
|
||||||
# use the type in possible_answer as the expected type
|
|
||||||
possible_answer_type = get_possible_answer_type(possible_answer)
|
|
||||||
# if possible_answer only contains optional parameters, we can't determine the type
|
|
||||||
if possible_answer_type != None:
|
|
||||||
# we are being precise here.
|
|
||||||
# in fact, possible_answer_type should always be string, as that's how we treat varibale in possible_answer
|
|
||||||
if possible_answer_type != expected_type_converted:
|
|
||||||
is_variable = True
|
|
||||||
|
|
||||||
# value is the same type as in function description
|
|
||||||
if type(value) == expected_type_converted:
|
|
||||||
# We don't need to do recursive check for simple types
|
|
||||||
if nested_type_converted == None:
|
|
||||||
result["is_variable"] = is_variable
|
|
||||||
return result
|
|
||||||
else:
|
|
||||||
for possible_answer_item in possible_answer:
|
|
||||||
flag = True # Each parameter should match to at least one possible answer type.
|
|
||||||
# Here, we assume that each item should be the same type. We could also relax it.
|
|
||||||
if type(possible_answer_item) == list:
|
|
||||||
for value_item in value:
|
|
||||||
checker_result = type_checker(
|
|
||||||
param,
|
|
||||||
value_item,
|
|
||||||
possible_answer_item,
|
|
||||||
str(nested_type_converted),
|
|
||||||
nested_type_converted,
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
if not checker_result["valid"]:
|
|
||||||
flag = False
|
|
||||||
break
|
|
||||||
|
|
||||||
if flag:
|
|
||||||
return {"valid": True, "error": [], "is_variable": is_variable}
|
|
||||||
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"] = [
|
|
||||||
f"Nested type checking failed for parameter {repr(param)}. Expected outer type {expected_type_description} with inner type {str(nested_type_converted)}. Parameter value: {repr(value)}."
|
|
||||||
]
|
|
||||||
result["error_type"] = "type_error:nested"
|
|
||||||
|
|
||||||
# value is not as expected, check for the case where a variable is used instead of a actual value
|
|
||||||
# use the type in possible_answer as the expected type
|
|
||||||
possible_answer_type = get_possible_answer_type(possible_answer)
|
|
||||||
# if possible_answer only contains optional parameters, we can't determine the type
|
|
||||||
if possible_answer_type != None:
|
|
||||||
# we are being precise here.
|
|
||||||
# in fact, possible_answer_type should always be string, as that's how we treat varibale in possible_answer
|
|
||||||
if type(value) == possible_answer_type:
|
|
||||||
result["is_variable"] = True
|
|
||||||
return result
|
|
||||||
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append(
|
|
||||||
f"Incorrect type for parameter {repr(param)}. Expected type {expected_type_description}, got {type(value).__name__}. Parameter value: {repr(value)}."
|
|
||||||
)
|
|
||||||
result["error_type"] = "type_error:simple"
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def standardize_string(input_string: str):
|
|
||||||
# This function standardizes the string by removing all the spaces, ",./-_*^" punctuation, and converting it to lowercase
|
|
||||||
# It will also convert all the single quotes to double quotes
|
|
||||||
# This is used to compare the model output with the possible answers
|
|
||||||
# We don't want to punish model for answer like April 1, 2024 vs April 1,2024, vs April 1 2024
|
|
||||||
regex_string = r"[ \,\.\/\-\_\*\^]"
|
|
||||||
return re.sub(regex_string, "", input_string).lower().replace("'", '"')
|
|
||||||
|
|
||||||
|
|
||||||
def string_checker(param: str, model_output: str, possible_answer: list):
|
|
||||||
standardize_possible_answer = []
|
|
||||||
standardize_model_output = standardize_string(model_output)
|
|
||||||
for i in range(len(possible_answer)):
|
|
||||||
if type(possible_answer[i]) == str:
|
|
||||||
standardize_possible_answer.append(standardize_string(possible_answer[i]))
|
|
||||||
|
|
||||||
if standardize_model_output not in standardize_possible_answer:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [
|
|
||||||
f"Invalid value for parameter {repr(param)}: {repr(model_output)}. Expected one of {possible_answer}. Case insensitive."
|
|
||||||
],
|
|
||||||
"error_type": "value_error:string",
|
|
||||||
}
|
|
||||||
|
|
||||||
return {"valid": True, "error": []}
|
|
||||||
|
|
||||||
|
|
||||||
def list_checker(param: str, model_output: list, possible_answer: list):
|
|
||||||
# Convert the tuple to a list
|
|
||||||
|
|
||||||
standardize_model_output = list(model_output)
|
|
||||||
|
|
||||||
# If the element in the list is a string, we need to standardize it
|
|
||||||
for i in range(len(standardize_model_output)):
|
|
||||||
if type(standardize_model_output[i]) == str:
|
|
||||||
standardize_model_output[i] = standardize_string(model_output[i])
|
|
||||||
|
|
||||||
standardize_possible_answer: Any = []
|
|
||||||
# We also need to standardize the possible answers
|
|
||||||
for i in range(len(possible_answer)):
|
|
||||||
standardize_possible_answer.append([])
|
|
||||||
for j in range(len(possible_answer[i])):
|
|
||||||
if type(possible_answer[i][j]) == str:
|
|
||||||
standardize_possible_answer[i].append(standardize_string(possible_answer[i][j]))
|
|
||||||
else:
|
|
||||||
standardize_possible_answer[i].append(possible_answer[i][j])
|
|
||||||
|
|
||||||
if standardize_model_output not in standardize_possible_answer:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [
|
|
||||||
f"Invalid value for parameter {repr(param)}: {repr(model_output)}. Expected one of {possible_answer}."
|
|
||||||
],
|
|
||||||
"error_type": "value_error:list/tuple",
|
|
||||||
}
|
|
||||||
|
|
||||||
return {"valid": True, "error": []}
|
|
||||||
|
|
||||||
|
|
||||||
def dict_checker(param: str, model_output: dict, possible_answers: list):
|
|
||||||
# This function works for simple dictionaries, but not dictionaries with nested dictionaries.
|
|
||||||
# The current dataset only contains simple dictionaries, so this is sufficient.
|
|
||||||
|
|
||||||
result = {"valid": False, "error": [], "error_type": "dict_checker:unclear"}
|
|
||||||
for i in range(len(possible_answers)):
|
|
||||||
if possible_answers[i] == "":
|
|
||||||
continue
|
|
||||||
|
|
||||||
result = {"valid": False, "error": [], "error_type": "dict_checker:unclear"}
|
|
||||||
|
|
||||||
flag = True
|
|
||||||
|
|
||||||
possible_answer = possible_answers[i]
|
|
||||||
# possible_anwer is a single dictionary
|
|
||||||
|
|
||||||
for key, value in model_output.items():
|
|
||||||
if key not in possible_answer:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append(f"Unexpected dict key parameter: '{key}'.") # type: ignore[attr-defined]
|
|
||||||
result["error_type"] = "value_error:dict_key"
|
|
||||||
flag = False
|
|
||||||
break
|
|
||||||
|
|
||||||
standardize_value = value
|
|
||||||
# If the value is a string, we need to standardize it
|
|
||||||
if type(value) == str:
|
|
||||||
standardize_value = standardize_string(value)
|
|
||||||
|
|
||||||
# We also need to standardize the possible answers if they are string
|
|
||||||
standardize_possible_answer = []
|
|
||||||
for i in range(len(possible_answer[key])):
|
|
||||||
if type(possible_answer[key][i]) == str:
|
|
||||||
standardize_possible_answer.append(standardize_string(possible_answer[key][i]))
|
|
||||||
else:
|
|
||||||
standardize_possible_answer.append(possible_answer[key][i])
|
|
||||||
|
|
||||||
if standardize_value not in standardize_possible_answer:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append( # type: ignore[attr-defined]
|
|
||||||
f"Invalid value for parameter {repr(key)}: {repr(value)}. Expected one of {standardize_possible_answer}."
|
|
||||||
)
|
|
||||||
result["error_type"] = "value_error:dict_value"
|
|
||||||
flag = False
|
|
||||||
break
|
|
||||||
|
|
||||||
for key, value in possible_answer.items():
|
|
||||||
if key not in model_output and "" not in value:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append(f"Missing dict key parameter: '{key}'.") # type: ignore[attr-defined]
|
|
||||||
result["error_type"] = "value_error:dict_key"
|
|
||||||
flag = False
|
|
||||||
break
|
|
||||||
|
|
||||||
if flag:
|
|
||||||
return {"valid": True, "error": []}
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def list_dict_checker(param: str, model_output: list, possible_answers: list):
|
|
||||||
# This function takes in a list of dictionaries and checks if each dictionary is valid
|
|
||||||
# The order of the dictionaries in the list must match the order of the possible answers
|
|
||||||
|
|
||||||
result = {"valid": False, "error": [], "error_type": "list_dict_checker:unclear"}
|
|
||||||
|
|
||||||
for answer_index in range(len(possible_answers)):
|
|
||||||
flag = True # True means so far, all dictionaries are valid
|
|
||||||
|
|
||||||
# Only proceed if the number of dictionaries in the list matches the number of dictionaries in the possible answers
|
|
||||||
if len(model_output) != len(possible_answers[answer_index]):
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"] = ["Wrong number of dictionaries in the list."]
|
|
||||||
result["error_type"] = "value_error:list_dict_count"
|
|
||||||
flag = False
|
|
||||||
continue
|
|
||||||
|
|
||||||
for dict_index in range(len(model_output)):
|
|
||||||
result = dict_checker(
|
|
||||||
param,
|
|
||||||
model_output[dict_index],
|
|
||||||
[possible_answers[answer_index][dict_index]],
|
|
||||||
)
|
|
||||||
if not result["valid"]:
|
|
||||||
flag = False
|
|
||||||
break
|
|
||||||
if flag:
|
|
||||||
return {"valid": True, "error": []}
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def simple_function_checker(
|
|
||||||
func_description: dict,
|
|
||||||
model_output: dict,
|
|
||||||
possible_answer: dict,
|
|
||||||
language: str,
|
|
||||||
model_name: str,
|
|
||||||
):
|
|
||||||
possible_answer = list(possible_answer.values())[0]
|
|
||||||
# Extract function name and parameters details
|
|
||||||
func_name = func_description["name"]
|
|
||||||
param_details = func_description["parameters"]["properties"]
|
|
||||||
required_params = func_description["parameters"]["required"]
|
|
||||||
|
|
||||||
# Initialize a result dictionary
|
|
||||||
result = {
|
|
||||||
"valid": True,
|
|
||||||
"error": [],
|
|
||||||
"error_type": "simple_function_checker:unclear",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check if function name matches
|
|
||||||
if func_name not in model_output:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append( # type: ignore[attr-defined]
|
|
||||||
f"Function name {repr(func_name)} not found in model output."
|
|
||||||
)
|
|
||||||
result["error_type"] = "simple_function_checker:wrong_func_name"
|
|
||||||
return result
|
|
||||||
|
|
||||||
model_params = model_output[func_name]
|
|
||||||
|
|
||||||
# Check for required parameters in model output
|
|
||||||
for param in required_params:
|
|
||||||
if param not in model_params:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append(f"Missing required parameter: {repr(param)}.") # type: ignore[attr-defined]
|
|
||||||
result["error_type"] = "simple_function_checker:missing_required"
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Validate types and values for each parameter in model output
|
|
||||||
for param, value in model_params.items():
|
|
||||||
if param not in param_details or param not in possible_answer:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append(f"Unexpected parameter: {repr(param)}.") # type: ignore[attr-defined]
|
|
||||||
result["error_type"] = "simple_function_checker:unexpected_param"
|
|
||||||
return result
|
|
||||||
|
|
||||||
full_param_details = param_details[param]
|
|
||||||
expected_type_description = full_param_details["type"] # This is a string
|
|
||||||
is_variable = False
|
|
||||||
nested_type_converted = None
|
|
||||||
|
|
||||||
if language == "Java":
|
|
||||||
from evals.utils.bfcl.java_type_converter import java_type_converter
|
|
||||||
|
|
||||||
expected_type_converted = JAVA_TYPE_CONVERSION[expected_type_description]
|
|
||||||
|
|
||||||
if expected_type_description in JAVA_TYPE_CONVERSION:
|
|
||||||
if type(value) != str:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append( # type: ignore[attr-defined]
|
|
||||||
f"Incorrect type for parameter {repr(param)}. Expected type String, got {type(value).__name__}. Parameter value: {repr(value)}."
|
|
||||||
)
|
|
||||||
result["error_type"] = "type_error:java"
|
|
||||||
return result
|
|
||||||
|
|
||||||
if expected_type_description in NESTED_CONVERSION_TYPE_LIST:
|
|
||||||
nested_type = param_details[param]["items"]["type"]
|
|
||||||
nested_type_converted = JAVA_TYPE_CONVERSION[nested_type]
|
|
||||||
value = java_type_converter(value, expected_type_description, nested_type)
|
|
||||||
else:
|
|
||||||
value = java_type_converter(value, expected_type_description)
|
|
||||||
|
|
||||||
elif language == "JavaScript":
|
|
||||||
from evals.utils.bfcl.js_type_converter import js_type_converter
|
|
||||||
|
|
||||||
expected_type_converted = JS_TYPE_CONVERSION[expected_type_description]
|
|
||||||
|
|
||||||
if expected_type_description in JS_TYPE_CONVERSION:
|
|
||||||
if type(value) != str:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append( # type: ignore[attr-defined]
|
|
||||||
f"Incorrect type for parameter {repr(param)}. Expected type String, got {type(value).__name__}. Parameter value: {repr(value)}."
|
|
||||||
)
|
|
||||||
result["error_type"] = "type_error:js"
|
|
||||||
return result
|
|
||||||
|
|
||||||
if expected_type_description in NESTED_CONVERSION_TYPE_LIST:
|
|
||||||
nested_type = param_details[param]["items"]["type"]
|
|
||||||
nested_type_converted = JS_TYPE_CONVERSION[nested_type]
|
|
||||||
value = js_type_converter(value, expected_type_description, nested_type)
|
|
||||||
else:
|
|
||||||
value = js_type_converter(value, expected_type_description)
|
|
||||||
|
|
||||||
elif language == "Python":
|
|
||||||
expected_type_converted = PYTHON_TYPE_MAPPING[expected_type_description]
|
|
||||||
if expected_type_description in PYTHON_NESTED_TYPE_CHECK_LIST:
|
|
||||||
nested_type = param_details[param]["items"]["type"]
|
|
||||||
nested_type_converted = PYTHON_TYPE_MAPPING[nested_type]
|
|
||||||
|
|
||||||
# We convert all tuple value to list when the expected type is tuple.
|
|
||||||
# The conversion is necessary because any tuple in the possible answer would become a list after being processed through json.dump() and json.load().
|
|
||||||
# This does introduce some false positive (eg, when the model provides a list value instead of tuple). We hope to find a better solution in the future.
|
|
||||||
if expected_type_description == "tuple" and type(value) == tuple:
|
|
||||||
value = list(value)
|
|
||||||
|
|
||||||
# Allow python auto conversion from int to float
|
|
||||||
if language == "Python" and expected_type_description == "float" and type(value) == int:
|
|
||||||
value = float(value)
|
|
||||||
|
|
||||||
# Type checking
|
|
||||||
# In fact, we only check for Python here.
|
|
||||||
# Type check for other languages are handled by the type converter, and so their value (after conversion) is always correct.
|
|
||||||
type_check_result = type_checker(
|
|
||||||
param,
|
|
||||||
value,
|
|
||||||
possible_answer[param],
|
|
||||||
expected_type_description,
|
|
||||||
expected_type_converted,
|
|
||||||
nested_type_converted,
|
|
||||||
)
|
|
||||||
is_variable = type_check_result["is_variable"]
|
|
||||||
if not type_check_result["valid"]:
|
|
||||||
return type_check_result
|
|
||||||
|
|
||||||
# It doesn't make sense to special handle dictionaries and list of dictionaries if the value is a variable.
|
|
||||||
# We can just treat the variable as a string and use the normal flow.
|
|
||||||
if not is_variable:
|
|
||||||
# Special handle for dictionaries
|
|
||||||
if expected_type_converted == dict:
|
|
||||||
result = dict_checker(param, value, possible_answer[param])
|
|
||||||
if not result["valid"]:
|
|
||||||
return result
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Special handle for list of dictionaries
|
|
||||||
elif expected_type_converted == list and nested_type_converted == dict:
|
|
||||||
result = list_dict_checker(param, value, possible_answer[param])
|
|
||||||
if not result["valid"]:
|
|
||||||
return result
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Special handle for strings
|
|
||||||
elif expected_type_converted == str:
|
|
||||||
# We don't check for case sensitivity for string, as long as it's not a variable
|
|
||||||
result = string_checker(param, value, possible_answer[param])
|
|
||||||
if not result["valid"]:
|
|
||||||
return result
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif expected_type_converted == list:
|
|
||||||
result = list_checker(param, value, possible_answer[param])
|
|
||||||
if not result["valid"]:
|
|
||||||
return result
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check if the value is within the possible answers
|
|
||||||
if value not in possible_answer[param]:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append( # type: ignore[attr-defined]
|
|
||||||
f"Invalid value for parameter {repr(param)}: {repr(value)}. Expected one of {possible_answer[param]}."
|
|
||||||
)
|
|
||||||
result["error_type"] = "value_error:others"
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Check for optional parameters not provided but allowed
|
|
||||||
for param in possible_answer:
|
|
||||||
if param not in model_params and "" not in possible_answer[param]:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append( # type: ignore[attr-defined]
|
|
||||||
f"Optional parameter {repr(param)} not provided and not marked as optional."
|
|
||||||
)
|
|
||||||
result["error_type"] = "simple_function_checker:missing_optional"
|
|
||||||
return result
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def parallel_function_checker_enforce_order(
|
|
||||||
func_descriptions: list,
|
|
||||||
model_output: list,
|
|
||||||
possible_answers: dict,
|
|
||||||
language: str,
|
|
||||||
model_name: str,
|
|
||||||
):
|
|
||||||
if len(model_output) != len(possible_answers):
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": ["Wrong number of functions."],
|
|
||||||
"error_type": "parallel_function_checker_enforce_order:wrong_count",
|
|
||||||
}
|
|
||||||
|
|
||||||
func_name_list = list(possible_answers.keys())
|
|
||||||
possible_answers_list = []
|
|
||||||
|
|
||||||
for key, value in possible_answers.items():
|
|
||||||
possible_answers_list.append({key: value})
|
|
||||||
|
|
||||||
for i in range(len(possible_answers_list)):
|
|
||||||
func_description = find_description(func_descriptions, func_name_list[i])
|
|
||||||
|
|
||||||
result = simple_function_checker(
|
|
||||||
func_description,
|
|
||||||
model_output[i],
|
|
||||||
possible_answers_list[i],
|
|
||||||
language,
|
|
||||||
model_name,
|
|
||||||
)
|
|
||||||
if not result["valid"]:
|
|
||||||
return result
|
|
||||||
|
|
||||||
return {"valid": True, "error": []}
|
|
||||||
|
|
||||||
|
|
||||||
def parallel_function_checker_no_order(
|
|
||||||
func_descriptions: list,
|
|
||||||
model_output: list,
|
|
||||||
possible_answers: list,
|
|
||||||
language: str,
|
|
||||||
model_name: str,
|
|
||||||
):
|
|
||||||
if len(model_output) != len(possible_answers):
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": ["Wrong number of functions."],
|
|
||||||
"error_type": "parallel_function_checker_no_order:wrong_count",
|
|
||||||
}
|
|
||||||
|
|
||||||
matched_indices = []
|
|
||||||
|
|
||||||
# We go throught the possible answers one by one, and eliminate the model output that matches the possible answer
|
|
||||||
# It must be this way because we need ground truth to fetch the correct function description
|
|
||||||
for i in range(len(possible_answers)):
|
|
||||||
# possible_answers[i] is a dictionary with only one key
|
|
||||||
func_name_expected = list(possible_answers[i].keys())[0]
|
|
||||||
func_description = find_description(func_descriptions, func_name_expected)
|
|
||||||
|
|
||||||
all_errors = []
|
|
||||||
|
|
||||||
for index in range(len(model_output)):
|
|
||||||
if index in matched_indices:
|
|
||||||
continue
|
|
||||||
|
|
||||||
result = simple_function_checker(
|
|
||||||
func_description,
|
|
||||||
model_output[index],
|
|
||||||
possible_answers[i],
|
|
||||||
language,
|
|
||||||
model_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
if result["valid"]:
|
|
||||||
matched_indices.append(index)
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
all_errors.append(
|
|
||||||
{
|
|
||||||
f"Model Result Index {index}": {
|
|
||||||
"sub_error": result["error"],
|
|
||||||
"sub_error_type": result["error_type"],
|
|
||||||
"model_output_item": model_output[index],
|
|
||||||
"possible_answer_item": possible_answers[i],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
if not result["valid"]:
|
|
||||||
considered_indices = [i for i in range(len(model_output)) if i not in matched_indices]
|
|
||||||
all_errors.insert(
|
|
||||||
0,
|
|
||||||
f"Could not find a matching function among index {considered_indices} of model output for index {i} of possible answers.", # type: ignore[arg-type]
|
|
||||||
)
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": all_errors,
|
|
||||||
"error_type": "parallel_function_checker_no_order:cannot_find_match",
|
|
||||||
}
|
|
||||||
|
|
||||||
return {"valid": True, "error": []}
|
|
||||||
|
|
||||||
|
|
||||||
def multiple_function_checker(
|
|
||||||
func_descriptions: list,
|
|
||||||
model_output: list,
|
|
||||||
possible_answers: list,
|
|
||||||
language: str,
|
|
||||||
model_name: str,
|
|
||||||
):
|
|
||||||
if len(model_output) != len(possible_answers):
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": ["Wrong number of functions."],
|
|
||||||
"error_type": "multiple_function_checker:wrong_count",
|
|
||||||
}
|
|
||||||
|
|
||||||
# possible_answers is a list of only one dictionary with only one key
|
|
||||||
func_name_expected = list(possible_answers[0].keys())[0]
|
|
||||||
func_description = find_description(func_descriptions, func_name_expected)
|
|
||||||
return simple_function_checker(
|
|
||||||
func_description,
|
|
||||||
model_output[0],
|
|
||||||
possible_answers[0],
|
|
||||||
language,
|
|
||||||
model_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def patten_matcher(exec_output, expected_result, function_call, is_sanity_check):
|
|
||||||
result = {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
|
|
||||||
|
|
||||||
if type(exec_output) != type(expected_result):
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [
|
|
||||||
f"Wrong execution result type for {repr(function_call)}. Expected type: {type(expected_result)}, but got: {type(exec_output)}."
|
|
||||||
],
|
|
||||||
"error_type": "executable_checker:wrong_result_type",
|
|
||||||
"model_executed_output": exec_output,
|
|
||||||
}
|
|
||||||
if type(exec_output) == dict:
|
|
||||||
# We loose the requirement for the sanity check as the expected result used in the sanity check might not be the most up-to-date one.
|
|
||||||
# This happens when the key is a timestamp or a random number.
|
|
||||||
if is_sanity_check:
|
|
||||||
if len(exec_output) != len(expected_result):
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [
|
|
||||||
f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but wrong number of elements in the output. Expected length: {len(expected_result)}, but got: {len(exec_output)}."
|
|
||||||
],
|
|
||||||
"error_type": "executable_checker:wrong_result_type:dict_length",
|
|
||||||
"model_executed_output": exec_output,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
return result
|
|
||||||
|
|
||||||
for key, value in expected_result.items():
|
|
||||||
if key not in exec_output:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [
|
|
||||||
f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but key {repr(key)} not found in the model output."
|
|
||||||
],
|
|
||||||
"error_type": "executable_checker:wrong_result_type:dict_key_not_found",
|
|
||||||
"model_executed_output": exec_output,
|
|
||||||
}
|
|
||||||
for key, value in exec_output.items():
|
|
||||||
if key not in expected_result:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [
|
|
||||||
f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but key {repr(key)} not expected in the model output."
|
|
||||||
],
|
|
||||||
"error_type": "executable_checker:wrong_result_type:dict_extra_key",
|
|
||||||
"model_executed_output": exec_output,
|
|
||||||
}
|
|
||||||
if type(exec_output) == list:
|
|
||||||
if len(exec_output) != len(expected_result):
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [
|
|
||||||
f"Wrong execution result pattern for {repr(function_call)}. Expect type list, but wrong number of elements in the output. Expected length: {len(expected_result)}, but got: {len(exec_output)}."
|
|
||||||
],
|
|
||||||
"error_type": "executable_checker:wrong_result_type:list_length",
|
|
||||||
"model_executed_output": exec_output,
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
#### Helper functions for Exec ####
|
|
||||||
def executable_checker_simple(
|
|
||||||
function_call: str,
|
|
||||||
expected_result,
|
|
||||||
expected_result_type: str,
|
|
||||||
is_sanity_check=False,
|
|
||||||
):
|
|
||||||
result = {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
|
|
||||||
|
|
||||||
exec_dict: Any = {}
|
|
||||||
|
|
||||||
try:
|
|
||||||
exec(
|
|
||||||
"from executable_python_function import *" + "\nresult=" + function_call,
|
|
||||||
exec_dict,
|
|
||||||
)
|
|
||||||
exec_output = exec_dict["result"]
|
|
||||||
except NoAPIKeyError as e:
|
|
||||||
raise e
|
|
||||||
except Exception as e:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append( # type: ignore[attr-defined]
|
|
||||||
f"Error in execution: {repr(function_call)}. Error: {str(e)}"
|
|
||||||
)
|
|
||||||
result["error_type"] = "executable_checker:execution_error"
|
|
||||||
return result
|
|
||||||
|
|
||||||
# We need to special handle the case where the execution result is a tuple and convert it to a list
|
|
||||||
# Because when json is stored, the tuple is converted to a list, and so the expected result is a list when loaded from json
|
|
||||||
if isinstance(exec_output, tuple):
|
|
||||||
exec_output = list(exec_output)
|
|
||||||
|
|
||||||
if expected_result_type == "exact_match":
|
|
||||||
if exec_output != expected_result:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append( # type: ignore[attr-defined]
|
|
||||||
f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}."
|
|
||||||
)
|
|
||||||
result["error_type"] = "executable_checker:wrong_result"
|
|
||||||
result["model_executed_output"] = exec_output
|
|
||||||
return result
|
|
||||||
|
|
||||||
elif expected_result_type == "real_time_match":
|
|
||||||
# Allow for 5% difference
|
|
||||||
if (type(expected_result) == float or type(expected_result) == int) and (
|
|
||||||
type(exec_output) == float or type(exec_output) == int
|
|
||||||
):
|
|
||||||
if not (
|
|
||||||
expected_result * (1 - REAL_TIME_MATCH_ALLOWED_DIFFERENCE)
|
|
||||||
<= exec_output
|
|
||||||
<= expected_result * (1 + REAL_TIME_MATCH_ALLOWED_DIFFERENCE)
|
|
||||||
):
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append( # type: ignore[attr-defined]
|
|
||||||
f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}. {REAL_TIME_MATCH_ALLOWED_DIFFERENCE * 100}% difference allowed."
|
|
||||||
)
|
|
||||||
result["error_type"] = "executable_checker:wrong_result_real_time"
|
|
||||||
result["model_executed_output"] = exec_output
|
|
||||||
return result
|
|
||||||
else:
|
|
||||||
result["valid"] = False
|
|
||||||
result["error"].append( # type: ignore[attr-defined]
|
|
||||||
f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}. Type needs to be float or int for real time match criteria."
|
|
||||||
)
|
|
||||||
result["error_type"] = "executable_checker:wrong_result_real_time"
|
|
||||||
result["model_executed_output"] = exec_output
|
|
||||||
return result
|
|
||||||
|
|
||||||
else:
|
|
||||||
# structural match
|
|
||||||
pattern_match_result = patten_matcher(exec_output, expected_result, function_call, is_sanity_check)
|
|
||||||
if not pattern_match_result["valid"]:
|
|
||||||
return pattern_match_result
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def executable_checker_parallel_no_order(
|
|
||||||
decoded_result: list, expected_exec_result: list, expected_exec_result_type: list
|
|
||||||
):
|
|
||||||
if len(decoded_result) != len(expected_exec_result):
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [
|
|
||||||
f"Wrong number of functions provided. Expected {len(expected_exec_result)}, but got {len(decoded_result)}."
|
|
||||||
],
|
|
||||||
"error_type": "value_error:exec_result_count",
|
|
||||||
}
|
|
||||||
|
|
||||||
matched_indices = []
|
|
||||||
for i in range(len(expected_exec_result)):
|
|
||||||
all_errors = []
|
|
||||||
for index in range(len(decoded_result)):
|
|
||||||
if index in matched_indices:
|
|
||||||
continue
|
|
||||||
|
|
||||||
result = executable_checker_simple(
|
|
||||||
decoded_result[index],
|
|
||||||
expected_exec_result[i],
|
|
||||||
expected_exec_result_type[i],
|
|
||||||
False,
|
|
||||||
)
|
|
||||||
|
|
||||||
if result["valid"]:
|
|
||||||
matched_indices.append(index)
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
all_errors.append(
|
|
||||||
{
|
|
||||||
f"Model Result Index {index}": {
|
|
||||||
"sub_error": result["error"],
|
|
||||||
"sub_error_type": result["error_type"],
|
|
||||||
"model_executed_output": (
|
|
||||||
result["model_executed_output"] if "model_executed_output" in result else None
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
if not result["valid"]:
|
|
||||||
considered_indices = [i for i in range(len(decoded_result)) if i not in matched_indices]
|
|
||||||
all_errors.insert(
|
|
||||||
0,
|
|
||||||
f"Could not find a matching function among index {considered_indices} of model output for index {i} of possible answers.", # type: ignore[arg-type]
|
|
||||||
)
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": all_errors,
|
|
||||||
"error_type": "executable_checker:cannot_find_match",
|
|
||||||
}
|
|
||||||
|
|
||||||
return {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
|
|
||||||
|
|
||||||
|
|
||||||
#### Main function ####
|
|
||||||
def executable_checker_rest(func_call, idx):
|
|
||||||
# Move this here for now to avoid needing to read this file / fix paths to be relative to dataset_dir. Fix when it's actually needed / used.
|
|
||||||
EVAL_GROUND_TRUTH_PATH = "/mnt/wsfuse/fair_llm_v2/datasets/eval/bfcl/rest-eval-response_v5.jsonl" # Ground truth file for v5 for rest execution
|
|
||||||
with open(EVAL_GROUND_TRUTH_PATH, "r") as f:
|
|
||||||
EVAL_GROUND_TRUTH = f.readlines()
|
|
||||||
if "https://geocode.maps.co" in func_call:
|
|
||||||
time.sleep(2)
|
|
||||||
if "requests_get" in func_call:
|
|
||||||
func_call = func_call.replace("requests_get", "requests.get")
|
|
||||||
try:
|
|
||||||
response = eval(func_call)
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [f"Execution failed. {str(e)}"],
|
|
||||||
"error_type": "executable_checker_rest:execution_error",
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
if response.status_code == 200:
|
|
||||||
eval_GT_json = json.loads(EVAL_GROUND_TRUTH[idx])
|
|
||||||
try:
|
|
||||||
if isinstance(eval_GT_json, dict):
|
|
||||||
if isinstance(response.json(), dict):
|
|
||||||
if set(eval_GT_json.keys()) == set(response.json().keys()):
|
|
||||||
return {"valid": True, "error": [], "error_type": ""}
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": ["Key inconsistency"],
|
|
||||||
"error_type": "executable_checker_rest:wrong_key",
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [f"Expected dictionary, but got {type(response.json())}"],
|
|
||||||
"error_type": "executable_checker_rest:wrong_type",
|
|
||||||
}
|
|
||||||
|
|
||||||
elif isinstance(eval_GT_json, list):
|
|
||||||
if isinstance(response.json(), list):
|
|
||||||
if len(eval_GT_json) != len(response.json()):
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [f"Response list length inconsistency."],
|
|
||||||
"error_type": "value_error:exec_result_rest_count",
|
|
||||||
}
|
|
||||||
|
|
||||||
else:
|
|
||||||
for i in range(len(eval_GT_json)):
|
|
||||||
if set(eval_GT_json[i].keys()) != set(response.json()[i].keys()):
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [f"Key inconsistency"],
|
|
||||||
"error_type": "executable_checker_rest:wrong_key",
|
|
||||||
}
|
|
||||||
|
|
||||||
return {"valid": True, "error": []}
|
|
||||||
else:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [f"Expected list, but got {type(response.json())}"],
|
|
||||||
"error_type": "executable_checker_rest:wrong_type",
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [f"Expected dict or list, but got {type(response.json())}"],
|
|
||||||
"error_type": "executable_checker_rest:wrong_type",
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [
|
|
||||||
f"Error in execution and type checking. Status code: {response.status_code}. Error: {str(e)}"
|
|
||||||
],
|
|
||||||
"error_type": "executable_checker_rest:response_format_error",
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [f"Execution result status code is not 200, got {response.status_code}"],
|
|
||||||
"error_type": "executable_checker_rest:wrong_status_code",
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": [f"Cannot get status code of the response. Error: {str(e)}"],
|
|
||||||
"error_type": "executable_checker_rest:cannot_get_status_code",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def ast_checker(func_description, model_output, possible_answer, language, test_category, model_name):
|
|
||||||
if "parallel" in test_category:
|
|
||||||
return parallel_function_checker_no_order(func_description, model_output, possible_answer, language, model_name)
|
|
||||||
|
|
||||||
elif "multiple" in test_category:
|
|
||||||
return multiple_function_checker(func_description, model_output, possible_answer, language, model_name)
|
|
||||||
|
|
||||||
else:
|
|
||||||
if len(model_output) != 1:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": ["Wrong number of functions."],
|
|
||||||
"error_type": "simple_function_checker:wrong_count",
|
|
||||||
}
|
|
||||||
|
|
||||||
return simple_function_checker(
|
|
||||||
func_description[0],
|
|
||||||
model_output[0],
|
|
||||||
possible_answer[0],
|
|
||||||
language,
|
|
||||||
model_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def exec_checker(decoded_result: list, func_description: dict, test_category: str):
|
|
||||||
if "multiple" in test_category or "parallel" in test_category:
|
|
||||||
return executable_checker_parallel_no_order(
|
|
||||||
decoded_result,
|
|
||||||
func_description["execution_result"],
|
|
||||||
func_description["execution_result_type"],
|
|
||||||
)
|
|
||||||
|
|
||||||
else:
|
|
||||||
if len(decoded_result) != 1:
|
|
||||||
return {
|
|
||||||
"valid": False,
|
|
||||||
"error": ["Wrong number of functions."],
|
|
||||||
"error_type": "simple_exec_checker:wrong_count",
|
|
||||||
}
|
|
||||||
return executable_checker_simple(
|
|
||||||
decoded_result[0],
|
|
||||||
func_description["execution_result"][0],
|
|
||||||
func_description["execution_result_type"][0],
|
|
||||||
False,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def is_empty_output(decoded_output):
|
|
||||||
# This function is a patch to the ast decoder for relevance detection
|
|
||||||
# Sometimes the ast decoder will parse successfully, but the input doens't really have a function call
|
|
||||||
# [], [{}], and anything that is not in function calling format is considered empty (and thus should be marked as correct)
|
|
||||||
if not is_function_calling_format_output(decoded_output):
|
|
||||||
return True
|
|
||||||
if len(decoded_output) == 0:
|
|
||||||
return True
|
|
||||||
if len(decoded_output) == 1 and len(decoded_output[0]) == 0:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def is_function_calling_format_output(decoded_output):
|
|
||||||
# Ensure the output is a list of dictionaries
|
|
||||||
if type(decoded_output) == list:
|
|
||||||
for item in decoded_output:
|
|
||||||
if type(item) != dict:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
return False
|
|
|
@ -1,40 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
"""
|
|
||||||
Tree-sitter changes its API with unfortunate frequency. Modules that need it should
|
|
||||||
import it from here so that we can centrally manage things as necessary.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# These currently work with tree-sitter 0.23.0
|
|
||||||
# NOTE: Don't import tree-sitter or any of the language modules in the main module
|
|
||||||
# because not all environments have them. Import lazily inside functions where needed.
|
|
||||||
|
|
||||||
import importlib
|
|
||||||
import typing
|
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
|
||||||
import tree_sitter
|
|
||||||
|
|
||||||
|
|
||||||
def get_language(language: str) -> "tree_sitter.Language":
|
|
||||||
import tree_sitter
|
|
||||||
|
|
||||||
language_module_name = f"tree_sitter_{language}"
|
|
||||||
try:
|
|
||||||
language_module = importlib.import_module(language_module_name)
|
|
||||||
except ModuleNotFoundError as exc:
|
|
||||||
raise ValueError(
|
|
||||||
f"Language {language} is not found. Please install the tree-sitter-{language} package."
|
|
||||||
) from exc
|
|
||||||
return tree_sitter.Language(language_module.language())
|
|
||||||
|
|
||||||
|
|
||||||
def get_parser(language: str, **kwargs) -> "tree_sitter.Parser":
|
|
||||||
import tree_sitter
|
|
||||||
|
|
||||||
lang = get_language(language)
|
|
||||||
return tree_sitter.Parser(lang, **kwargs)
|
|
|
@ -14,6 +14,6 @@ from .config import RagToolRuntimeConfig
|
||||||
async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]):
|
async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]):
|
||||||
from .memory import MemoryToolRuntimeImpl
|
from .memory import MemoryToolRuntimeImpl
|
||||||
|
|
||||||
impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference])
|
impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference], deps[Api.files])
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
@ -5,10 +5,15 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import mimetypes
|
||||||
import secrets
|
import secrets
|
||||||
import string
|
import string
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from fastapi import UploadFile
|
||||||
from pydantic import TypeAdapter
|
from pydantic import TypeAdapter
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import (
|
from llama_stack.apis.common.content_types import (
|
||||||
|
@ -17,6 +22,7 @@ from llama_stack.apis.common.content_types import (
|
||||||
InterleavedContentItem,
|
InterleavedContentItem,
|
||||||
TextContentItem,
|
TextContentItem,
|
||||||
)
|
)
|
||||||
|
from llama_stack.apis.files import Files, OpenAIFilePurpose
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import Inference
|
||||||
from llama_stack.apis.tools import (
|
from llama_stack.apis.tools import (
|
||||||
ListToolDefsResponse,
|
ListToolDefsResponse,
|
||||||
|
@ -30,13 +36,18 @@ from llama_stack.apis.tools import (
|
||||||
ToolParameter,
|
ToolParameter,
|
||||||
ToolRuntime,
|
ToolRuntime,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.vector_io import QueryChunksResponse, VectorIO
|
from llama_stack.apis.vector_io import (
|
||||||
|
QueryChunksResponse,
|
||||||
|
VectorIO,
|
||||||
|
VectorStoreChunkingStrategyStatic,
|
||||||
|
VectorStoreChunkingStrategyStaticConfig,
|
||||||
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||||
from llama_stack.providers.utils.memory.vector_store import (
|
from llama_stack.providers.utils.memory.vector_store import (
|
||||||
content_from_doc,
|
content_from_doc,
|
||||||
make_overlapped_chunks,
|
parse_data_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import RagToolRuntimeConfig
|
from .config import RagToolRuntimeConfig
|
||||||
|
@ -55,10 +66,12 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
config: RagToolRuntimeConfig,
|
config: RagToolRuntimeConfig,
|
||||||
vector_io_api: VectorIO,
|
vector_io_api: VectorIO,
|
||||||
inference_api: Inference,
|
inference_api: Inference,
|
||||||
|
files_api: Files,
|
||||||
):
|
):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.vector_io_api = vector_io_api
|
self.vector_io_api = vector_io_api
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
|
self.files_api = files_api
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
pass
|
pass
|
||||||
|
@ -78,27 +91,50 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
vector_db_id: str,
|
vector_db_id: str,
|
||||||
chunk_size_in_tokens: int = 512,
|
chunk_size_in_tokens: int = 512,
|
||||||
) -> None:
|
) -> None:
|
||||||
chunks = []
|
if not documents:
|
||||||
|
return
|
||||||
|
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
content = await content_from_doc(doc)
|
if isinstance(doc.content, URL):
|
||||||
# TODO: we should add enrichment here as URLs won't be added to the metadata by default
|
if doc.content.uri.startswith("data:"):
|
||||||
chunks.extend(
|
parts = parse_data_url(doc.content.uri)
|
||||||
make_overlapped_chunks(
|
file_data = base64.b64decode(parts["data"]) if parts["is_base64"] else parts["data"].encode()
|
||||||
doc.document_id,
|
mime_type = parts["mimetype"]
|
||||||
content,
|
else:
|
||||||
chunk_size_in_tokens,
|
async with httpx.AsyncClient() as client:
|
||||||
chunk_size_in_tokens // 4,
|
response = await client.get(doc.content.uri)
|
||||||
doc.metadata,
|
file_data = response.content
|
||||||
|
mime_type = doc.mime_type or response.headers.get("content-type", "application/octet-stream")
|
||||||
|
else:
|
||||||
|
content_str = await content_from_doc(doc)
|
||||||
|
file_data = content_str.encode("utf-8")
|
||||||
|
mime_type = doc.mime_type or "text/plain"
|
||||||
|
|
||||||
|
file_extension = mimetypes.guess_extension(mime_type) or ".txt"
|
||||||
|
filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
|
||||||
|
|
||||||
|
file_obj = io.BytesIO(file_data)
|
||||||
|
file_obj.name = filename
|
||||||
|
|
||||||
|
upload_file = UploadFile(file=file_obj, filename=filename)
|
||||||
|
|
||||||
|
created_file = await self.files_api.openai_upload_file(
|
||||||
|
file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
|
||||||
|
)
|
||||||
|
|
||||||
|
chunking_strategy = VectorStoreChunkingStrategyStatic(
|
||||||
|
static=VectorStoreChunkingStrategyStaticConfig(
|
||||||
|
max_chunk_size_tokens=chunk_size_in_tokens,
|
||||||
|
chunk_overlap_tokens=chunk_size_in_tokens // 4,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if not chunks:
|
await self.vector_io_api.openai_attach_file_to_vector_store(
|
||||||
return
|
vector_store_id=vector_db_id,
|
||||||
|
file_id=created_file.id,
|
||||||
await self.vector_io_api.insert_chunks(
|
attributes=doc.metadata,
|
||||||
chunks=chunks,
|
chunking_strategy=chunking_strategy,
|
||||||
vector_db_id=vector_db_id,
|
)
|
||||||
)
|
|
||||||
|
|
||||||
async def query(
|
async def query(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -30,11 +30,11 @@ from llama_stack.providers.utils.kvstore.api import KVStore
|
||||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
||||||
from llama_stack.providers.utils.memory.vector_store import (
|
from llama_stack.providers.utils.memory.vector_store import (
|
||||||
RERANKER_TYPE_RRF,
|
RERANKER_TYPE_RRF,
|
||||||
RERANKER_TYPE_WEIGHTED,
|
|
||||||
ChunkForDeletion,
|
ChunkForDeletion,
|
||||||
EmbeddingIndex,
|
EmbeddingIndex,
|
||||||
VectorDBWithIndex,
|
VectorDBWithIndex,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="vector_io")
|
logger = get_logger(name=__name__, category="vector_io")
|
||||||
|
|
||||||
|
@ -66,59 +66,6 @@ def _create_sqlite_connection(db_path):
|
||||||
return connection
|
return connection
|
||||||
|
|
||||||
|
|
||||||
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
|
|
||||||
"""Normalize scores to [0,1] range using min-max normalization."""
|
|
||||||
if not scores:
|
|
||||||
return {}
|
|
||||||
min_score = min(scores.values())
|
|
||||||
max_score = max(scores.values())
|
|
||||||
score_range = max_score - min_score
|
|
||||||
if score_range > 0:
|
|
||||||
return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
|
|
||||||
return dict.fromkeys(scores, 1.0)
|
|
||||||
|
|
||||||
|
|
||||||
def _weighted_rerank(
|
|
||||||
vector_scores: dict[str, float],
|
|
||||||
keyword_scores: dict[str, float],
|
|
||||||
alpha: float = 0.5,
|
|
||||||
) -> dict[str, float]:
|
|
||||||
"""ReRanker that uses weighted average of scores."""
|
|
||||||
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
|
|
||||||
normalized_vector_scores = _normalize_scores(vector_scores)
|
|
||||||
normalized_keyword_scores = _normalize_scores(keyword_scores)
|
|
||||||
|
|
||||||
return {
|
|
||||||
doc_id: (alpha * normalized_keyword_scores.get(doc_id, 0.0))
|
|
||||||
+ ((1 - alpha) * normalized_vector_scores.get(doc_id, 0.0))
|
|
||||||
for doc_id in all_ids
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _rrf_rerank(
|
|
||||||
vector_scores: dict[str, float],
|
|
||||||
keyword_scores: dict[str, float],
|
|
||||||
impact_factor: float = 60.0,
|
|
||||||
) -> dict[str, float]:
|
|
||||||
"""ReRanker that uses Reciprocal Rank Fusion."""
|
|
||||||
# Convert scores to ranks
|
|
||||||
vector_ranks = {
|
|
||||||
doc_id: i + 1 for i, (doc_id, _) in enumerate(sorted(vector_scores.items(), key=lambda x: x[1], reverse=True))
|
|
||||||
}
|
|
||||||
keyword_ranks = {
|
|
||||||
doc_id: i + 1 for i, (doc_id, _) in enumerate(sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True))
|
|
||||||
}
|
|
||||||
|
|
||||||
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
|
|
||||||
rrf_scores = {}
|
|
||||||
for doc_id in all_ids:
|
|
||||||
vector_rank = vector_ranks.get(doc_id, float("inf"))
|
|
||||||
keyword_rank = keyword_ranks.get(doc_id, float("inf"))
|
|
||||||
# RRF formula: score = 1/(k + r) where k is impact_factor and r is the rank
|
|
||||||
rrf_scores[doc_id] = (1.0 / (impact_factor + vector_rank)) + (1.0 / (impact_factor + keyword_rank))
|
|
||||||
return rrf_scores
|
|
||||||
|
|
||||||
|
|
||||||
def _make_sql_identifier(name: str) -> str:
|
def _make_sql_identifier(name: str) -> str:
|
||||||
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
||||||
|
|
||||||
|
@ -398,14 +345,10 @@ class SQLiteVecIndex(EmbeddingIndex):
|
||||||
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Combine scores using the specified reranker
|
# Combine scores using the reranking utility
|
||||||
if reranker_type == RERANKER_TYPE_WEIGHTED:
|
combined_scores = WeightedInMemoryAggregator.combine_search_results(
|
||||||
alpha = reranker_params.get("alpha", 0.5)
|
vector_scores, keyword_scores, reranker_type, reranker_params
|
||||||
combined_scores = _weighted_rerank(vector_scores, keyword_scores, alpha)
|
)
|
||||||
else:
|
|
||||||
# Default to RRF for None, RRF, or any unknown types
|
|
||||||
impact_factor = reranker_params.get("impact_factor", 60.0)
|
|
||||||
combined_scores = _rrf_rerank(vector_scores, keyword_scores, impact_factor)
|
|
||||||
|
|
||||||
# Sort by combined score and get top k results
|
# Sort by combined score and get top k results
|
||||||
sorted_items = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
|
sorted_items = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
|
|
@ -116,7 +116,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="fireworks",
|
adapter_type="fireworks",
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"fireworks-ai",
|
"fireworks-ai<=0.17.16",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.inference.fireworks",
|
module="llama_stack.providers.remote.inference.fireworks",
|
||||||
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
||||||
|
@ -207,7 +207,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="gemini",
|
adapter_type="gemini",
|
||||||
pip_packages=["litellm"],
|
pip_packages=["litellm", "openai"],
|
||||||
module="llama_stack.providers.remote.inference.gemini",
|
module="llama_stack.providers.remote.inference.gemini",
|
||||||
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
||||||
|
@ -248,7 +248,7 @@ Available Models:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="groq",
|
adapter_type="groq",
|
||||||
pip_packages=["litellm"],
|
pip_packages=["litellm", "openai"],
|
||||||
module="llama_stack.providers.remote.inference.groq",
|
module="llama_stack.providers.remote.inference.groq",
|
||||||
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
||||||
|
@ -270,7 +270,7 @@ Available Models:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="sambanova",
|
adapter_type="sambanova",
|
||||||
pip_packages=["litellm"],
|
pip_packages=["litellm", "openai"],
|
||||||
module="llama_stack.providers.remote.inference.sambanova",
|
module="llama_stack.providers.remote.inference.sambanova",
|
||||||
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
||||||
|
@ -292,7 +292,7 @@ Available Models:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="watsonx",
|
adapter_type="watsonx",
|
||||||
pip_packages=["ibm_watson_machine_learning"],
|
pip_packages=["ibm_watsonx_ai"],
|
||||||
module="llama_stack.providers.remote.inference.watsonx",
|
module="llama_stack.providers.remote.inference.watsonx",
|
||||||
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
||||||
|
|
|
@ -32,7 +32,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.tool_runtime.rag",
|
module="llama_stack.providers.inline.tool_runtime.rag",
|
||||||
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
|
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
|
||||||
api_dependencies=[Api.vector_io, Api.inference],
|
api_dependencies=[Api.vector_io, Api.inference, Api.files],
|
||||||
description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
|
description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
|
|
@ -404,6 +404,60 @@ That means you'll get fast and efficient vector retrieval.
|
||||||
- Easy to use
|
- Easy to use
|
||||||
- Fully integrated with Llama Stack
|
- Fully integrated with Llama Stack
|
||||||
|
|
||||||
|
There are three implementations of search for PGVectoIndex available:
|
||||||
|
|
||||||
|
1. Vector Search:
|
||||||
|
- How it works:
|
||||||
|
- Uses PostgreSQL's vector extension (pgvector) to perform similarity search
|
||||||
|
- Compares query embeddings against stored embeddings using Cosine distance or other distance metrics
|
||||||
|
- Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance
|
||||||
|
|
||||||
|
-Characteristics:
|
||||||
|
- Semantic understanding - finds documents similar in meaning even if they don't share keywords
|
||||||
|
- Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
|
||||||
|
- Best for: Finding conceptually related content, handling synonyms, cross-language search
|
||||||
|
|
||||||
|
2. Keyword Search
|
||||||
|
- How it works:
|
||||||
|
- Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank
|
||||||
|
- Converts text to searchable tokens using to_tsvector('english', text). Default language is English.
|
||||||
|
- Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
|
||||||
|
|
||||||
|
- Characteristics:
|
||||||
|
- Lexical matching - finds exact keyword matches and variations
|
||||||
|
- Uses GIN (Generalized Inverted Index) for fast text search performance
|
||||||
|
- Scoring: Uses PostgreSQL's ts_rank function for relevance scoring
|
||||||
|
- Best for: Exact term matching, proper names, technical terms, Boolean-style queries
|
||||||
|
|
||||||
|
3. Hybrid Search
|
||||||
|
- How it works:
|
||||||
|
- Combines both vector and keyword search results
|
||||||
|
- Runs both searches independently, then merges results using configurable reranking
|
||||||
|
|
||||||
|
- Two reranking strategies available:
|
||||||
|
- Reciprocal Rank Fusion (RRF) - (default: 60.0)
|
||||||
|
- Weighted Average - (default: 0.5)
|
||||||
|
|
||||||
|
- Characteristics:
|
||||||
|
- Best of both worlds: semantic understanding + exact matching
|
||||||
|
- Documents appearing in both searches get boosted scores
|
||||||
|
- Configurable balance between semantic and lexical matching
|
||||||
|
- Best for: General-purpose search where you want both precision and recall
|
||||||
|
|
||||||
|
4. Database Schema
|
||||||
|
The PGVector implementation stores data optimized for all three search types:
|
||||||
|
CREATE TABLE vector_store_xxx (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
document JSONB, -- Original document
|
||||||
|
embedding vector(dimension), -- For vector search
|
||||||
|
content_text TEXT, -- Raw text content
|
||||||
|
tokenized_content TSVECTOR -- For keyword search
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for performance
|
||||||
|
CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search
|
||||||
|
-- Vector index created automatically by pgvector
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
To use PGVector in your Llama Stack project, follow these steps:
|
To use PGVector in your Llama Stack project, follow these steps:
|
||||||
|
@ -412,6 +466,25 @@ To use PGVector in your Llama Stack project, follow these steps:
|
||||||
2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
|
2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
|
||||||
3. Start storing and querying vectors.
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## This is an example how you can set up your environment for using PGVector
|
||||||
|
|
||||||
|
1. Export env vars:
|
||||||
|
```bash
|
||||||
|
export ENABLE_PGVECTOR=true
|
||||||
|
export PGVECTOR_HOST=localhost
|
||||||
|
export PGVECTOR_PORT=5432
|
||||||
|
export PGVECTOR_DB=llamastack
|
||||||
|
export PGVECTOR_USER=llamastack
|
||||||
|
export PGVECTOR_PASSWORD=llamastack
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Create DB:
|
||||||
|
```bash
|
||||||
|
psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
|
||||||
|
psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
|
||||||
|
psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
|
||||||
|
```
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
You can install PGVector using docker:
|
You can install PGVector using docker:
|
||||||
|
@ -449,6 +522,7 @@ Weaviate supports:
|
||||||
- Metadata filtering
|
- Metadata filtering
|
||||||
- Multi-modal retrieval
|
- Multi-modal retrieval
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
To use Weaviate in your Llama Stack project, follow these steps:
|
To use Weaviate in your Llama Stack project, follow these steps:
|
||||||
|
|
|
@ -6,15 +6,14 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.core.datatypes import Api
|
from llama_stack.core.datatypes import AccessRule, Api
|
||||||
|
|
||||||
from .config import S3FilesImplConfig
|
from .config import S3FilesImplConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: S3FilesImplConfig, deps: dict[Api, Any]):
|
async def get_adapter_impl(config: S3FilesImplConfig, deps: dict[Api, Any], policy: list[AccessRule] | None = None):
|
||||||
from .files import S3FilesImpl
|
from .files import S3FilesImpl
|
||||||
|
|
||||||
# TODO: authorization policies and user separation
|
impl = S3FilesImpl(config, policy or [])
|
||||||
impl = S3FilesImpl(config)
|
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
@ -4,9 +4,9 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import time
|
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Annotated
|
from datetime import UTC, datetime
|
||||||
|
from typing import Annotated, Any
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError
|
from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError
|
||||||
|
@ -15,14 +15,17 @@ from fastapi import File, Form, Response, UploadFile
|
||||||
from llama_stack.apis.common.errors import ResourceNotFoundError
|
from llama_stack.apis.common.errors import ResourceNotFoundError
|
||||||
from llama_stack.apis.common.responses import Order
|
from llama_stack.apis.common.responses import Order
|
||||||
from llama_stack.apis.files import (
|
from llama_stack.apis.files import (
|
||||||
|
ExpiresAfter,
|
||||||
Files,
|
Files,
|
||||||
ListOpenAIFileResponse,
|
ListOpenAIFileResponse,
|
||||||
OpenAIFileDeleteResponse,
|
OpenAIFileDeleteResponse,
|
||||||
OpenAIFileObject,
|
OpenAIFileObject,
|
||||||
OpenAIFilePurpose,
|
OpenAIFilePurpose,
|
||||||
)
|
)
|
||||||
|
from llama_stack.core.datatypes import AccessRule
|
||||||
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
||||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStore, sqlstore_impl
|
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||||
|
from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
|
||||||
|
|
||||||
from .config import S3FilesImplConfig
|
from .config import S3FilesImplConfig
|
||||||
|
|
||||||
|
@ -83,22 +86,85 @@ async def _create_bucket_if_not_exists(client: boto3.client, config: S3FilesImpl
|
||||||
raise RuntimeError(f"Failed to access S3 bucket '{config.bucket_name}': {e}") from e
|
raise RuntimeError(f"Failed to access S3 bucket '{config.bucket_name}': {e}") from e
|
||||||
|
|
||||||
|
|
||||||
|
def _make_file_object(
|
||||||
|
*,
|
||||||
|
id: str,
|
||||||
|
filename: str,
|
||||||
|
purpose: str,
|
||||||
|
bytes: int,
|
||||||
|
created_at: int,
|
||||||
|
expires_at: int,
|
||||||
|
**kwargs: Any, # here to ignore any additional fields, e.g. extra fields from AuthorizedSqlStore
|
||||||
|
) -> OpenAIFileObject:
|
||||||
|
"""
|
||||||
|
Construct an OpenAIFileObject and normalize expires_at.
|
||||||
|
|
||||||
|
If expires_at is greater than the max we treat it as no-expiration and
|
||||||
|
return None for expires_at.
|
||||||
|
|
||||||
|
The OpenAI spec says expires_at type is Integer, but the implementation
|
||||||
|
will return None for no expiration.
|
||||||
|
"""
|
||||||
|
obj = OpenAIFileObject(
|
||||||
|
id=id,
|
||||||
|
filename=filename,
|
||||||
|
purpose=OpenAIFilePurpose(purpose),
|
||||||
|
bytes=bytes,
|
||||||
|
created_at=created_at,
|
||||||
|
expires_at=expires_at,
|
||||||
|
)
|
||||||
|
|
||||||
|
if obj.expires_at is not None and obj.expires_at > (obj.created_at + ExpiresAfter.MAX):
|
||||||
|
obj.expires_at = None # type: ignore
|
||||||
|
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
class S3FilesImpl(Files):
|
class S3FilesImpl(Files):
|
||||||
"""S3-based implementation of the Files API."""
|
"""S3-based implementation of the Files API."""
|
||||||
|
|
||||||
# TODO: implement expiration, for now a silly offset
|
def __init__(self, config: S3FilesImplConfig, policy: list[AccessRule]) -> None:
|
||||||
_SILLY_EXPIRATION_OFFSET = 100 * 365 * 24 * 60 * 60
|
|
||||||
|
|
||||||
def __init__(self, config: S3FilesImplConfig) -> None:
|
|
||||||
self._config = config
|
self._config = config
|
||||||
|
self.policy = policy
|
||||||
self._client: boto3.client | None = None
|
self._client: boto3.client | None = None
|
||||||
self._sql_store: SqlStore | None = None
|
self._sql_store: AuthorizedSqlStore | None = None
|
||||||
|
|
||||||
|
def _now(self) -> int:
|
||||||
|
"""Return current UTC timestamp as int seconds."""
|
||||||
|
return int(datetime.now(UTC).timestamp())
|
||||||
|
|
||||||
|
async def _get_file(self, file_id: str, return_expired: bool = False) -> dict[str, Any]:
|
||||||
|
where: dict[str, str | dict] = {"id": file_id}
|
||||||
|
if not return_expired:
|
||||||
|
where["expires_at"] = {">": self._now()}
|
||||||
|
if not (row := await self.sql_store.fetch_one("openai_files", policy=self.policy, where=where)):
|
||||||
|
raise ResourceNotFoundError(file_id, "File", "files.list()")
|
||||||
|
return row
|
||||||
|
|
||||||
|
async def _delete_file(self, file_id: str) -> None:
|
||||||
|
"""Delete a file from S3 and the database."""
|
||||||
|
try:
|
||||||
|
self.client.delete_object(
|
||||||
|
Bucket=self._config.bucket_name,
|
||||||
|
Key=file_id,
|
||||||
|
)
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response["Error"]["Code"] != "NoSuchKey":
|
||||||
|
raise RuntimeError(f"Failed to delete file from S3: {e}") from e
|
||||||
|
|
||||||
|
await self.sql_store.delete("openai_files", where={"id": file_id})
|
||||||
|
|
||||||
|
async def _delete_if_expired(self, file_id: str) -> None:
|
||||||
|
"""If the file exists and is expired, delete it."""
|
||||||
|
if row := await self._get_file(file_id, return_expired=True):
|
||||||
|
if (expires_at := row.get("expires_at")) and expires_at <= self._now():
|
||||||
|
await self._delete_file(file_id)
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
self._client = _create_s3_client(self._config)
|
self._client = _create_s3_client(self._config)
|
||||||
await _create_bucket_if_not_exists(self._client, self._config)
|
await _create_bucket_if_not_exists(self._client, self._config)
|
||||||
|
|
||||||
self._sql_store = sqlstore_impl(self._config.metadata_store)
|
self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store))
|
||||||
await self._sql_store.create_table(
|
await self._sql_store.create_table(
|
||||||
"openai_files",
|
"openai_files",
|
||||||
{
|
{
|
||||||
|
@ -121,7 +187,7 @@ class S3FilesImpl(Files):
|
||||||
return self._client
|
return self._client
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def sql_store(self) -> SqlStore:
|
def sql_store(self) -> AuthorizedSqlStore:
|
||||||
assert self._sql_store is not None, "Provider not initialized"
|
assert self._sql_store is not None, "Provider not initialized"
|
||||||
return self._sql_store
|
return self._sql_store
|
||||||
|
|
||||||
|
@ -129,27 +195,47 @@ class S3FilesImpl(Files):
|
||||||
self,
|
self,
|
||||||
file: Annotated[UploadFile, File()],
|
file: Annotated[UploadFile, File()],
|
||||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||||
|
expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
|
||||||
|
expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
|
||||||
) -> OpenAIFileObject:
|
) -> OpenAIFileObject:
|
||||||
file_id = f"file-{uuid.uuid4().hex}"
|
file_id = f"file-{uuid.uuid4().hex}"
|
||||||
|
|
||||||
filename = getattr(file, "filename", None) or "uploaded_file"
|
filename = getattr(file, "filename", None) or "uploaded_file"
|
||||||
|
|
||||||
created_at = int(time.time())
|
created_at = self._now()
|
||||||
expires_at = created_at + self._SILLY_EXPIRATION_OFFSET
|
|
||||||
|
expires_after = None
|
||||||
|
if expires_after_anchor is not None or expires_after_seconds is not None:
|
||||||
|
# we use ExpiresAfter to validate input
|
||||||
|
expires_after = ExpiresAfter(
|
||||||
|
anchor=expires_after_anchor, # type: ignore[arg-type]
|
||||||
|
seconds=expires_after_seconds, # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
|
||||||
|
# the default is no expiration.
|
||||||
|
# to implement no expiration we set an expiration beyond the max.
|
||||||
|
# we'll hide this fact from users when returning the file object.
|
||||||
|
expires_at = created_at + ExpiresAfter.MAX * 42
|
||||||
|
# the default for BATCH files is 30 days, which happens to be the expiration max.
|
||||||
|
if purpose == OpenAIFilePurpose.BATCH:
|
||||||
|
expires_at = created_at + ExpiresAfter.MAX
|
||||||
|
|
||||||
|
if expires_after is not None:
|
||||||
|
expires_at = created_at + expires_after.seconds
|
||||||
|
|
||||||
content = await file.read()
|
content = await file.read()
|
||||||
file_size = len(content)
|
file_size = len(content)
|
||||||
|
|
||||||
await self.sql_store.insert(
|
entry: dict[str, Any] = {
|
||||||
"openai_files",
|
"id": file_id,
|
||||||
{
|
"filename": filename,
|
||||||
"id": file_id,
|
"purpose": purpose.value,
|
||||||
"filename": filename,
|
"bytes": file_size,
|
||||||
"purpose": purpose.value,
|
"created_at": created_at,
|
||||||
"bytes": file_size,
|
"expires_at": expires_at,
|
||||||
"created_at": created_at,
|
}
|
||||||
"expires_at": expires_at,
|
|
||||||
},
|
await self.sql_store.insert("openai_files", entry)
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.client.put_object(
|
self.client.put_object(
|
||||||
|
@ -163,14 +249,7 @@ class S3FilesImpl(Files):
|
||||||
|
|
||||||
raise RuntimeError(f"Failed to upload file to S3: {e}") from e
|
raise RuntimeError(f"Failed to upload file to S3: {e}") from e
|
||||||
|
|
||||||
return OpenAIFileObject(
|
return _make_file_object(**entry)
|
||||||
id=file_id,
|
|
||||||
filename=filename,
|
|
||||||
purpose=purpose,
|
|
||||||
bytes=file_size,
|
|
||||||
created_at=created_at,
|
|
||||||
expires_at=expires_at,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def openai_list_files(
|
async def openai_list_files(
|
||||||
self,
|
self,
|
||||||
|
@ -183,29 +262,20 @@ class S3FilesImpl(Files):
|
||||||
if not order:
|
if not order:
|
||||||
order = Order.desc
|
order = Order.desc
|
||||||
|
|
||||||
where_conditions = {}
|
where_conditions: dict[str, Any] = {"expires_at": {">": self._now()}}
|
||||||
if purpose:
|
if purpose:
|
||||||
where_conditions["purpose"] = purpose.value
|
where_conditions["purpose"] = purpose.value
|
||||||
|
|
||||||
paginated_result = await self.sql_store.fetch_all(
|
paginated_result = await self.sql_store.fetch_all(
|
||||||
table="openai_files",
|
table="openai_files",
|
||||||
where=where_conditions if where_conditions else None,
|
policy=self.policy,
|
||||||
|
where=where_conditions,
|
||||||
order_by=[("created_at", order.value)],
|
order_by=[("created_at", order.value)],
|
||||||
cursor=("id", after) if after else None,
|
cursor=("id", after) if after else None,
|
||||||
limit=limit,
|
limit=limit,
|
||||||
)
|
)
|
||||||
|
|
||||||
files = [
|
files = [_make_file_object(**row) for row in paginated_result.data]
|
||||||
OpenAIFileObject(
|
|
||||||
id=row["id"],
|
|
||||||
filename=row["filename"],
|
|
||||||
purpose=OpenAIFilePurpose(row["purpose"]),
|
|
||||||
bytes=row["bytes"],
|
|
||||||
created_at=row["created_at"],
|
|
||||||
expires_at=row["expires_at"],
|
|
||||||
)
|
|
||||||
for row in paginated_result.data
|
|
||||||
]
|
|
||||||
|
|
||||||
return ListOpenAIFileResponse(
|
return ListOpenAIFileResponse(
|
||||||
data=files,
|
data=files,
|
||||||
|
@ -216,41 +286,20 @@ class S3FilesImpl(Files):
|
||||||
)
|
)
|
||||||
|
|
||||||
async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject:
|
async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject:
|
||||||
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
|
await self._delete_if_expired(file_id)
|
||||||
if not row:
|
row = await self._get_file(file_id)
|
||||||
raise ResourceNotFoundError(file_id, "File", "files.list()")
|
return _make_file_object(**row)
|
||||||
|
|
||||||
return OpenAIFileObject(
|
|
||||||
id=row["id"],
|
|
||||||
filename=row["filename"],
|
|
||||||
purpose=OpenAIFilePurpose(row["purpose"]),
|
|
||||||
bytes=row["bytes"],
|
|
||||||
created_at=row["created_at"],
|
|
||||||
expires_at=row["expires_at"],
|
|
||||||
)
|
|
||||||
|
|
||||||
async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse:
|
async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse:
|
||||||
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
|
await self._delete_if_expired(file_id)
|
||||||
if not row:
|
_ = await self._get_file(file_id) # raises if not found
|
||||||
raise ResourceNotFoundError(file_id, "File", "files.list()")
|
await self._delete_file(file_id)
|
||||||
|
|
||||||
try:
|
|
||||||
self.client.delete_object(
|
|
||||||
Bucket=self._config.bucket_name,
|
|
||||||
Key=row["id"],
|
|
||||||
)
|
|
||||||
except ClientError as e:
|
|
||||||
if e.response["Error"]["Code"] != "NoSuchKey":
|
|
||||||
raise RuntimeError(f"Failed to delete file from S3: {e}") from e
|
|
||||||
|
|
||||||
await self.sql_store.delete("openai_files", where={"id": file_id})
|
|
||||||
|
|
||||||
return OpenAIFileDeleteResponse(id=file_id, deleted=True)
|
return OpenAIFileDeleteResponse(id=file_id, deleted=True)
|
||||||
|
|
||||||
async def openai_retrieve_file_content(self, file_id: str) -> Response:
|
async def openai_retrieve_file_content(self, file_id: str) -> Response:
|
||||||
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
|
await self._delete_if_expired(file_id)
|
||||||
if not row:
|
|
||||||
raise ResourceNotFoundError(file_id, "File", "files.list()")
|
row = await self._get_file(file_id)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self.client.get_object(
|
response = self.client.get_object(
|
||||||
|
@ -261,7 +310,7 @@ class S3FilesImpl(Files):
|
||||||
content = response["Body"].read()
|
content = response["Body"].read()
|
||||||
except ClientError as e:
|
except ClientError as e:
|
||||||
if e.response["Error"]["Code"] == "NoSuchKey":
|
if e.response["Error"]["Code"] == "NoSuchKey":
|
||||||
await self.sql_store.delete("openai_files", where={"id": file_id})
|
await self._delete_file(file_id)
|
||||||
raise ResourceNotFoundError(file_id, "File", "files.list()") from e
|
raise ResourceNotFoundError(file_id, "File", "files.list()") from e
|
||||||
raise RuntimeError(f"Failed to download file from S3: {e}") from e
|
raise RuntimeError(f"Failed to download file from S3: {e}") from e
|
||||||
|
|
||||||
|
|
|
@ -5,12 +5,13 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import GeminiConfig
|
from .config import GeminiConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class GeminiInferenceAdapter(LiteLLMOpenAIMixin):
|
class GeminiInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
def __init__(self, config: GeminiConfig) -> None:
|
def __init__(self, config: GeminiConfig) -> None:
|
||||||
LiteLLMOpenAIMixin.__init__(
|
LiteLLMOpenAIMixin.__init__(
|
||||||
self,
|
self,
|
||||||
|
@ -21,6 +22,11 @@ class GeminiInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
)
|
)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self):
|
||||||
|
return "https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
await super().initialize()
|
await super().initialize()
|
||||||
|
|
||||||
|
|
|
@ -4,30 +4,15 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from collections.abc import AsyncIterator
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from openai import AsyncOpenAI
|
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAIChatCompletionChunk,
|
|
||||||
OpenAIChoiceDelta,
|
|
||||||
OpenAIChunkChoice,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
OpenAISystemMessageParam,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
prepare_openai_completion_params,
|
|
||||||
)
|
|
||||||
|
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class GroqInferenceAdapter(LiteLLMOpenAIMixin):
|
class GroqInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
_config: GroqConfig
|
_config: GroqConfig
|
||||||
|
|
||||||
def __init__(self, config: GroqConfig):
|
def __init__(self, config: GroqConfig):
|
||||||
|
@ -40,122 +25,14 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
)
|
)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self) -> str:
|
||||||
|
return f"{self.config.url}/openai/v1"
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
await super().initialize()
|
await super().initialize()
|
||||||
|
|
||||||
async def shutdown(self):
|
async def shutdown(self):
|
||||||
await super().shutdown()
|
await super().shutdown()
|
||||||
|
|
||||||
def _get_openai_client(self) -> AsyncOpenAI:
|
|
||||||
return AsyncOpenAI(
|
|
||||||
base_url=f"{self.config.url}/openai/v1",
|
|
||||||
api_key=self.get_api_key(),
|
|
||||||
)
|
|
||||||
|
|
||||||
async def openai_chat_completion(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
messages: list[OpenAIMessageParam],
|
|
||||||
frequency_penalty: float | None = None,
|
|
||||||
function_call: str | dict[str, Any] | None = None,
|
|
||||||
functions: list[dict[str, Any]] | None = None,
|
|
||||||
logit_bias: dict[str, float] | None = None,
|
|
||||||
logprobs: bool | None = None,
|
|
||||||
max_completion_tokens: int | None = None,
|
|
||||||
max_tokens: int | None = None,
|
|
||||||
n: int | None = None,
|
|
||||||
parallel_tool_calls: bool | None = None,
|
|
||||||
presence_penalty: float | None = None,
|
|
||||||
response_format: OpenAIResponseFormatParam | None = None,
|
|
||||||
seed: int | None = None,
|
|
||||||
stop: str | list[str] | None = None,
|
|
||||||
stream: bool | None = None,
|
|
||||||
stream_options: dict[str, Any] | None = None,
|
|
||||||
temperature: float | None = None,
|
|
||||||
tool_choice: str | dict[str, Any] | None = None,
|
|
||||||
tools: list[dict[str, Any]] | None = None,
|
|
||||||
top_logprobs: int | None = None,
|
|
||||||
top_p: float | None = None,
|
|
||||||
user: str | None = None,
|
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
||||||
model_obj = await self.model_store.get_model(model)
|
|
||||||
|
|
||||||
# Groq does not support json_schema response format, so we need to convert it to json_object
|
|
||||||
if response_format and response_format.type == "json_schema":
|
|
||||||
response_format.type = "json_object"
|
|
||||||
schema = response_format.json_schema.get("schema", {})
|
|
||||||
response_format.json_schema = None
|
|
||||||
json_instructions = f"\nYour response should be a JSON object that matches the following schema: {schema}"
|
|
||||||
if messages and messages[0].role == "system":
|
|
||||||
messages[0].content = messages[0].content + json_instructions
|
|
||||||
else:
|
|
||||||
messages.insert(0, OpenAISystemMessageParam(content=json_instructions))
|
|
||||||
|
|
||||||
# Groq returns a 400 error if tools are provided but none are called
|
|
||||||
# So, set tool_choice to "required" to attempt to force a call
|
|
||||||
if tools and (not tool_choice or tool_choice == "auto"):
|
|
||||||
tool_choice = "required"
|
|
||||||
|
|
||||||
params = await prepare_openai_completion_params(
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
messages=messages,
|
|
||||||
frequency_penalty=frequency_penalty,
|
|
||||||
function_call=function_call,
|
|
||||||
functions=functions,
|
|
||||||
logit_bias=logit_bias,
|
|
||||||
logprobs=logprobs,
|
|
||||||
max_completion_tokens=max_completion_tokens,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
n=n,
|
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
|
||||||
presence_penalty=presence_penalty,
|
|
||||||
response_format=response_format,
|
|
||||||
seed=seed,
|
|
||||||
stop=stop,
|
|
||||||
stream=stream,
|
|
||||||
stream_options=stream_options,
|
|
||||||
temperature=temperature,
|
|
||||||
tool_choice=tool_choice,
|
|
||||||
tools=tools,
|
|
||||||
top_logprobs=top_logprobs,
|
|
||||||
top_p=top_p,
|
|
||||||
user=user,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Groq does not support streaming requests that set response_format
|
|
||||||
fake_stream = False
|
|
||||||
if stream and response_format:
|
|
||||||
params["stream"] = False
|
|
||||||
fake_stream = True
|
|
||||||
|
|
||||||
response = await self._get_openai_client().chat.completions.create(**params)
|
|
||||||
|
|
||||||
if fake_stream:
|
|
||||||
chunk_choices = []
|
|
||||||
for choice in response.choices:
|
|
||||||
delta = OpenAIChoiceDelta(
|
|
||||||
content=choice.message.content,
|
|
||||||
role=choice.message.role,
|
|
||||||
tool_calls=choice.message.tool_calls,
|
|
||||||
)
|
|
||||||
chunk_choice = OpenAIChunkChoice(
|
|
||||||
delta=delta,
|
|
||||||
finish_reason=choice.finish_reason,
|
|
||||||
index=choice.index,
|
|
||||||
logprobs=None,
|
|
||||||
)
|
|
||||||
chunk_choices.append(chunk_choice)
|
|
||||||
chunk = OpenAIChatCompletionChunk(
|
|
||||||
id=response.id,
|
|
||||||
choices=chunk_choices,
|
|
||||||
object="chat.completion.chunk",
|
|
||||||
created=response.created,
|
|
||||||
model=response.model,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _fake_stream_generator():
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
return _fake_stream_generator()
|
|
||||||
else:
|
|
||||||
return response
|
|
||||||
|
|
|
@ -41,10 +41,10 @@ client.initialize()
|
||||||
|
|
||||||
### Create Completion
|
### Create Completion
|
||||||
|
|
||||||
> Note on Completion API
|
The following example shows how to create a completion for an NVIDIA NIM.
|
||||||
>
|
|
||||||
> The hosted NVIDIA Llama NIMs (e.g., `meta-llama/Llama-3.1-8B-Instruct`) with ```NVIDIA_BASE_URL="https://integrate.api.nvidia.com"``` does not support the ```completion``` method, while the locally deployed NIM does.
|
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> The hosted NVIDIA Llama NIMs (for example ```meta-llama/Llama-3.1-8B-Instruct```) that have ```NVIDIA_BASE_URL="https://integrate.api.nvidia.com"``` do not support the ```completion``` method, while locally deployed NIMs do.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
response = client.inference.completion(
|
response = client.inference.completion(
|
||||||
|
@ -60,6 +60,8 @@ print(f"Response: {response.content}")
|
||||||
|
|
||||||
### Create Chat Completion
|
### Create Chat Completion
|
||||||
|
|
||||||
|
The following example shows how to create a chat completion for an NVIDIA NIM.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
response = client.inference.chat_completion(
|
response = client.inference.chat_completion(
|
||||||
model_id="meta-llama/Llama-3.1-8B-Instruct",
|
model_id="meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
@ -82,6 +84,9 @@ print(f"Response: {response.completion_message.content}")
|
||||||
```
|
```
|
||||||
|
|
||||||
### Tool Calling Example ###
|
### Tool Calling Example ###
|
||||||
|
|
||||||
|
The following example shows how to do tool calling for an NVIDIA NIM.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
|
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
|
||||||
|
|
||||||
|
@ -117,6 +122,9 @@ if tool_response.completion_message.tool_calls:
|
||||||
```
|
```
|
||||||
|
|
||||||
### Structured Output Example
|
### Structured Output Example
|
||||||
|
|
||||||
|
The following example shows how to do structured output for an NVIDIA NIM.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType
|
from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType
|
||||||
|
|
||||||
|
@ -149,8 +157,10 @@ print(f"Structured Response: {structured_response.completion_message.content}")
|
||||||
```
|
```
|
||||||
|
|
||||||
### Create Embeddings
|
### Create Embeddings
|
||||||
> Note on OpenAI embeddings compatibility
|
|
||||||
>
|
The following example shows how to create embeddings for an NVIDIA NIM.
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
> NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`.
|
> NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -161,3 +171,41 @@ response = client.inference.embeddings(
|
||||||
)
|
)
|
||||||
print(f"Embeddings: {response.embeddings}")
|
print(f"Embeddings: {response.embeddings}")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Vision Language Models Example
|
||||||
|
|
||||||
|
The following example shows how to run vision inference by using an NVIDIA NIM.
|
||||||
|
|
||||||
|
```python
|
||||||
|
def load_image_as_base64(image_path):
|
||||||
|
with open(image_path, "rb") as image_file:
|
||||||
|
img_bytes = image_file.read()
|
||||||
|
return base64.b64encode(img_bytes).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
image_path = {path_to_the_image}
|
||||||
|
demo_image_b64 = load_image_as_base64(image_path)
|
||||||
|
|
||||||
|
vlm_response = client.inference.chat_completion(
|
||||||
|
model_id="nvidia/vila",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"image": {
|
||||||
|
"data": demo_image_b64,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Please describe what you see in this image in detail.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"VLM Response: {vlm_response.completion_message.content}")
|
||||||
|
```
|
||||||
|
|
|
@ -55,6 +55,10 @@ MODEL_ENTRIES = [
|
||||||
"meta/llama-3.3-70b-instruct",
|
"meta/llama-3.3-70b-instruct",
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
|
ProviderModelEntry(
|
||||||
|
provider_model_id="nvidia/vila",
|
||||||
|
model_type=ModelType.llm,
|
||||||
|
),
|
||||||
# NeMo Retriever Text Embedding models -
|
# NeMo Retriever Text Embedding models -
|
||||||
#
|
#
|
||||||
# https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
# https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
||||||
|
|
|
@ -118,10 +118,10 @@ class OllamaInferenceAdapter(
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
|
logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
|
||||||
health_response = await self.health()
|
r = await self.health()
|
||||||
if health_response["status"] == HealthStatus.ERROR:
|
if r["status"] == HealthStatus.ERROR:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Ollama Server is not running, make sure to start it using `ollama serve` in a separate terminal"
|
f"Ollama Server is not running (message: {r['message']}). Make sure to start it using `ollama serve` in a separate terminal"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def should_refresh_models(self) -> bool:
|
async def should_refresh_models(self) -> bool:
|
||||||
|
@ -156,7 +156,7 @@ class OllamaInferenceAdapter(
|
||||||
),
|
),
|
||||||
Model(
|
Model(
|
||||||
identifier="nomic-embed-text",
|
identifier="nomic-embed-text",
|
||||||
provider_resource_id="nomic-embed-text",
|
provider_resource_id="nomic-embed-text:latest",
|
||||||
provider_id=provider_id,
|
provider_id=provider_id,
|
||||||
metadata={
|
metadata={
|
||||||
"embedding_dimension": 768,
|
"embedding_dimension": 768,
|
||||||
|
|
|
@ -4,13 +4,26 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import SambaNovaImplConfig
|
from .config import SambaNovaImplConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin):
|
class SambaNovaInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
|
"""
|
||||||
|
SambaNova Inference Adapter for Llama Stack.
|
||||||
|
|
||||||
|
Note: The inheritance order is important here. OpenAIMixin must come before
|
||||||
|
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
||||||
|
is used instead of LiteLLMOpenAIMixin.check_model_availability().
|
||||||
|
|
||||||
|
- OpenAIMixin.check_model_availability() queries the /v1/models to check if a model exists
|
||||||
|
- LiteLLMOpenAIMixin.check_model_availability() checks the static registry within LiteLLM
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, config: SambaNovaImplConfig):
|
def __init__(self, config: SambaNovaImplConfig):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.environment_available_models = []
|
self.environment_available_models = []
|
||||||
|
@ -24,3 +37,14 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
download_images=True, # SambaNova requires base64 image encoding
|
download_images=True, # SambaNova requires base64 image encoding
|
||||||
json_schema_strict=False, # SambaNova doesn't support strict=True yet
|
json_schema_strict=False, # SambaNova doesn't support strict=True yet
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self) -> str:
|
||||||
|
"""
|
||||||
|
Get the base URL for OpenAI mixin.
|
||||||
|
|
||||||
|
:return: The SambaNova base URL
|
||||||
|
"""
|
||||||
|
return self.config.url
|
||||||
|
|
|
@ -7,8 +7,8 @@
|
||||||
from collections.abc import AsyncGenerator, AsyncIterator
|
from collections.abc import AsyncGenerator, AsyncIterator
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from ibm_watson_machine_learning.foundation_models import Model
|
from ibm_watsonx_ai.foundation_models import Model
|
||||||
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
|
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
|
from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import heapq
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import psycopg2
|
import psycopg2
|
||||||
|
@ -23,6 +24,9 @@ from llama_stack.apis.vector_io import (
|
||||||
)
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||||
|
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
|
interleaved_content_as_str,
|
||||||
|
)
|
||||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
||||||
|
@ -31,6 +35,7 @@ from llama_stack.providers.utils.memory.vector_store import (
|
||||||
EmbeddingIndex,
|
EmbeddingIndex,
|
||||||
VectorDBWithIndex,
|
VectorDBWithIndex,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
|
||||||
|
|
||||||
from .config import PGVectorVectorIOConfig
|
from .config import PGVectorVectorIOConfig
|
||||||
|
|
||||||
|
@ -72,25 +77,63 @@ def load_models(cur, cls):
|
||||||
|
|
||||||
|
|
||||||
class PGVectorIndex(EmbeddingIndex):
|
class PGVectorIndex(EmbeddingIndex):
|
||||||
def __init__(self, vector_db: VectorDB, dimension: int, conn, kvstore: KVStore | None = None):
|
# reference: https://github.com/pgvector/pgvector?tab=readme-ov-file#querying
|
||||||
self.conn = conn
|
PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION: dict[str, str] = {
|
||||||
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
"L2": "<->",
|
||||||
# Sanitize the table name by replacing hyphens with underscores
|
"L1": "<+>",
|
||||||
# SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens
|
"COSINE": "<=>",
|
||||||
# when created with patterns like "test-vector-db-{uuid4()}"
|
"INNER_PRODUCT": "<#>",
|
||||||
sanitized_identifier = vector_db.identifier.replace("-", "_")
|
"HAMMING": "<~>",
|
||||||
self.table_name = f"vector_store_{sanitized_identifier}"
|
"JACCARD": "<%>",
|
||||||
self.kvstore = kvstore
|
}
|
||||||
|
|
||||||
cur.execute(
|
def __init__(
|
||||||
f"""
|
self,
|
||||||
CREATE TABLE IF NOT EXISTS {self.table_name} (
|
vector_db: VectorDB,
|
||||||
id TEXT PRIMARY KEY,
|
dimension: int,
|
||||||
document JSONB,
|
conn: psycopg2.extensions.connection,
|
||||||
embedding vector({dimension})
|
kvstore: KVStore | None = None,
|
||||||
|
distance_metric: str = "COSINE",
|
||||||
|
):
|
||||||
|
self.vector_db = vector_db
|
||||||
|
self.dimension = dimension
|
||||||
|
self.conn = conn
|
||||||
|
self.kvstore = kvstore
|
||||||
|
self.check_distance_metric_availability(distance_metric)
|
||||||
|
self.distance_metric = distance_metric
|
||||||
|
self.table_name = None
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
try:
|
||||||
|
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||||
|
# Sanitize the table name by replacing hyphens with underscores
|
||||||
|
# SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens
|
||||||
|
# when created with patterns like "test-vector-db-{uuid4()}"
|
||||||
|
sanitized_identifier = sanitize_collection_name(self.vector_db.identifier)
|
||||||
|
self.table_name = f"vs_{sanitized_identifier}"
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS {self.table_name} (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
document JSONB,
|
||||||
|
embedding vector({self.dimension}),
|
||||||
|
content_text TEXT,
|
||||||
|
tokenized_content TSVECTOR
|
||||||
|
)
|
||||||
|
"""
|
||||||
)
|
)
|
||||||
"""
|
|
||||||
)
|
# Create GIN index for full-text search performance
|
||||||
|
cur.execute(
|
||||||
|
f"""
|
||||||
|
CREATE INDEX IF NOT EXISTS {self.table_name}_content_gin_idx
|
||||||
|
ON {self.table_name} USING GIN(tokenized_content)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}")
|
||||||
|
raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e
|
||||||
|
|
||||||
async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
|
async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
|
||||||
assert len(chunks) == len(embeddings), (
|
assert len(chunks) == len(embeddings), (
|
||||||
|
@ -99,29 +142,49 @@ class PGVectorIndex(EmbeddingIndex):
|
||||||
|
|
||||||
values = []
|
values = []
|
||||||
for i, chunk in enumerate(chunks):
|
for i, chunk in enumerate(chunks):
|
||||||
|
content_text = interleaved_content_as_str(chunk.content)
|
||||||
values.append(
|
values.append(
|
||||||
(
|
(
|
||||||
f"{chunk.chunk_id}",
|
f"{chunk.chunk_id}",
|
||||||
Json(chunk.model_dump()),
|
Json(chunk.model_dump()),
|
||||||
embeddings[i].tolist(),
|
embeddings[i].tolist(),
|
||||||
|
content_text,
|
||||||
|
content_text, # Pass content_text twice - once for content_text column, once for to_tsvector function. Eg. to_tsvector(content_text) = tokenized_content
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
query = sql.SQL(
|
query = sql.SQL(
|
||||||
f"""
|
f"""
|
||||||
INSERT INTO {self.table_name} (id, document, embedding)
|
INSERT INTO {self.table_name} (id, document, embedding, content_text, tokenized_content)
|
||||||
VALUES %s
|
VALUES %s
|
||||||
ON CONFLICT (id) DO UPDATE SET embedding = EXCLUDED.embedding, document = EXCLUDED.document
|
ON CONFLICT (id) DO UPDATE SET
|
||||||
|
embedding = EXCLUDED.embedding,
|
||||||
|
document = EXCLUDED.document,
|
||||||
|
content_text = EXCLUDED.content_text,
|
||||||
|
tokenized_content = EXCLUDED.tokenized_content
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||||
execute_values(cur, query, values, template="(%s, %s, %s::vector)")
|
execute_values(cur, query, values, template="(%s, %s, %s::vector, %s, to_tsvector('english', %s))")
|
||||||
|
|
||||||
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
|
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
|
||||||
|
"""
|
||||||
|
Performs vector similarity search using PostgreSQL's search function. Default distance metric is COSINE.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
embedding: The query embedding vector
|
||||||
|
k: Number of results to return
|
||||||
|
score_threshold: Minimum similarity score threshold
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
QueryChunksResponse with combined results
|
||||||
|
"""
|
||||||
|
pgvector_search_function = self.get_pgvector_search_function()
|
||||||
|
|
||||||
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
f"""
|
f"""
|
||||||
SELECT document, embedding <-> %s::vector AS distance
|
SELECT document, embedding {pgvector_search_function} %s::vector AS distance
|
||||||
FROM {self.table_name}
|
FROM {self.table_name}
|
||||||
ORDER BY distance
|
ORDER BY distance
|
||||||
LIMIT %s
|
LIMIT %s
|
||||||
|
@ -147,7 +210,40 @@ class PGVectorIndex(EmbeddingIndex):
|
||||||
k: int,
|
k: int,
|
||||||
score_threshold: float,
|
score_threshold: float,
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
raise NotImplementedError("Keyword search is not supported in PGVector")
|
"""
|
||||||
|
Performs keyword-based search using PostgreSQL's full-text search with ts_rank scoring.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query_string: The text query for keyword search
|
||||||
|
k: Number of results to return
|
||||||
|
score_threshold: Minimum similarity score threshold
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
QueryChunksResponse with combined results
|
||||||
|
"""
|
||||||
|
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||||
|
# Use plainto_tsquery to handle user input safely and ts_rank for relevance scoring
|
||||||
|
cur.execute(
|
||||||
|
f"""
|
||||||
|
SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
|
||||||
|
FROM {self.table_name}
|
||||||
|
WHERE tokenized_content @@ plainto_tsquery('english', %s)
|
||||||
|
ORDER BY score DESC
|
||||||
|
LIMIT %s
|
||||||
|
""",
|
||||||
|
(query_string, query_string, k),
|
||||||
|
)
|
||||||
|
results = cur.fetchall()
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
scores = []
|
||||||
|
for doc, score in results:
|
||||||
|
if score < score_threshold:
|
||||||
|
continue
|
||||||
|
chunks.append(Chunk(**doc))
|
||||||
|
scores.append(float(score))
|
||||||
|
|
||||||
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||||
|
|
||||||
async def query_hybrid(
|
async def query_hybrid(
|
||||||
self,
|
self,
|
||||||
|
@ -158,7 +254,59 @@ class PGVectorIndex(EmbeddingIndex):
|
||||||
reranker_type: str,
|
reranker_type: str,
|
||||||
reranker_params: dict[str, Any] | None = None,
|
reranker_params: dict[str, Any] | None = None,
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
raise NotImplementedError("Hybrid search is not supported in PGVector")
|
"""
|
||||||
|
Hybrid search combining vector similarity and keyword search using configurable reranking.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
embedding: The query embedding vector
|
||||||
|
query_string: The text query for keyword search
|
||||||
|
k: Number of results to return
|
||||||
|
score_threshold: Minimum similarity score threshold
|
||||||
|
reranker_type: Type of reranker to use ("rrf" or "weighted")
|
||||||
|
reranker_params: Parameters for the reranker
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
QueryChunksResponse with combined results
|
||||||
|
"""
|
||||||
|
if reranker_params is None:
|
||||||
|
reranker_params = {}
|
||||||
|
|
||||||
|
# Get results from both search methods
|
||||||
|
vector_response = await self.query_vector(embedding, k, score_threshold)
|
||||||
|
keyword_response = await self.query_keyword(query_string, k, score_threshold)
|
||||||
|
|
||||||
|
# Convert responses to score dictionaries using chunk_id
|
||||||
|
vector_scores = {
|
||||||
|
chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
|
||||||
|
}
|
||||||
|
keyword_scores = {
|
||||||
|
chunk.chunk_id: score
|
||||||
|
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Combine scores using the reranking utility
|
||||||
|
combined_scores = WeightedInMemoryAggregator.combine_search_results(
|
||||||
|
vector_scores, keyword_scores, reranker_type, reranker_params
|
||||||
|
)
|
||||||
|
|
||||||
|
# Efficient top-k selection because it only tracks the k best candidates it's seen so far
|
||||||
|
top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
|
||||||
|
|
||||||
|
# Filter by score threshold
|
||||||
|
filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
|
||||||
|
|
||||||
|
# Create a map of chunk_id to chunk for both responses
|
||||||
|
chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
|
||||||
|
|
||||||
|
# Use the map to look up chunks by their IDs
|
||||||
|
chunks = []
|
||||||
|
scores = []
|
||||||
|
for doc_id, score in filtered_items:
|
||||||
|
if doc_id in chunk_map:
|
||||||
|
chunks.append(chunk_map[doc_id])
|
||||||
|
scores.append(score)
|
||||||
|
|
||||||
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||||
|
|
||||||
async def delete(self):
|
async def delete(self):
|
||||||
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||||
|
@ -170,6 +318,25 @@ class PGVectorIndex(EmbeddingIndex):
|
||||||
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||||
cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids,))
|
cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids,))
|
||||||
|
|
||||||
|
def get_pgvector_search_function(self) -> str:
|
||||||
|
return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric]
|
||||||
|
|
||||||
|
def check_distance_metric_availability(self, distance_metric: str) -> None:
|
||||||
|
"""Check if the distance metric is supported by PGVector.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
distance_metric: The distance metric to check
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the distance metric is not supported
|
||||||
|
"""
|
||||||
|
if distance_metric not in self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION:
|
||||||
|
supported_metrics = list(self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION.keys())
|
||||||
|
raise ValueError(
|
||||||
|
f"Distance metric '{distance_metric}' is not supported by PGVector. "
|
||||||
|
f"Supported metrics are: {', '.join(supported_metrics)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -185,8 +352,8 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
|
||||||
self.files_api = files_api
|
self.files_api = files_api
|
||||||
self.kvstore: KVStore | None = None
|
self.kvstore: KVStore | None = None
|
||||||
self.vector_db_store = None
|
self.vector_db_store = None
|
||||||
self.openai_vector_store: dict[str, dict[str, Any]] = {}
|
self.openai_vector_stores: dict[str, dict[str, Any]] = {}
|
||||||
self.metadatadata_collection_name = "openai_vector_stores_metadata"
|
self.metadata_collection_name = "openai_vector_stores_metadata"
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
log.info(f"Initializing PGVector memory adapter with config: {self.config}")
|
log.info(f"Initializing PGVector memory adapter with config: {self.config}")
|
||||||
|
@ -233,9 +400,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
|
||||||
upsert_models(self.conn, [(vector_db.identifier, vector_db)])
|
upsert_models(self.conn, [(vector_db.identifier, vector_db)])
|
||||||
|
|
||||||
# Create and cache the PGVector index table for the vector DB
|
# Create and cache the PGVector index table for the vector DB
|
||||||
|
pgvector_index = PGVectorIndex(
|
||||||
|
vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore
|
||||||
|
)
|
||||||
|
await pgvector_index.initialize()
|
||||||
index = VectorDBWithIndex(
|
index = VectorDBWithIndex(
|
||||||
vector_db,
|
vector_db,
|
||||||
index=PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn, kvstore=self.kvstore),
|
index=pgvector_index,
|
||||||
inference_api=self.inference_api,
|
inference_api=self.inference_api,
|
||||||
)
|
)
|
||||||
self.cache[vector_db.identifier] = index
|
self.cache[vector_db.identifier] = index
|
||||||
|
@ -272,8 +443,15 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
|
||||||
if vector_db_id in self.cache:
|
if vector_db_id in self.cache:
|
||||||
return self.cache[vector_db_id]
|
return self.cache[vector_db_id]
|
||||||
|
|
||||||
|
if self.vector_db_store is None:
|
||||||
|
raise VectorStoreNotFoundError(vector_db_id)
|
||||||
|
|
||||||
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
|
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
|
||||||
|
if not vector_db:
|
||||||
|
raise VectorStoreNotFoundError(vector_db_id)
|
||||||
|
|
||||||
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
|
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
|
||||||
|
await index.initialize()
|
||||||
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
|
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
|
||||||
return self.cache[vector_db_id]
|
return self.cache[vector_db_id]
|
||||||
|
|
||||||
|
|
|
@ -4,53 +4,55 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
class BedrockBaseConfig(BaseModel):
|
class BedrockBaseConfig(BaseModel):
|
||||||
aws_access_key_id: str | None = Field(
|
aws_access_key_id: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_ACCESS_KEY_ID"),
|
||||||
description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
|
description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
|
||||||
)
|
)
|
||||||
aws_secret_access_key: str | None = Field(
|
aws_secret_access_key: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||||
description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
|
description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
|
||||||
)
|
)
|
||||||
aws_session_token: str | None = Field(
|
aws_session_token: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_SESSION_TOKEN"),
|
||||||
description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
|
description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
|
||||||
)
|
)
|
||||||
region_name: str | None = Field(
|
region_name: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_DEFAULT_REGION"),
|
||||||
description="The default AWS Region to use, for example, us-west-1 or us-west-2."
|
description="The default AWS Region to use, for example, us-west-1 or us-west-2."
|
||||||
"Default use environment variable: AWS_DEFAULT_REGION",
|
"Default use environment variable: AWS_DEFAULT_REGION",
|
||||||
)
|
)
|
||||||
profile_name: str | None = Field(
|
profile_name: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_PROFILE"),
|
||||||
description="The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE",
|
description="The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE",
|
||||||
)
|
)
|
||||||
total_max_attempts: int | None = Field(
|
total_max_attempts: int | None = Field(
|
||||||
default=None,
|
default_factory=lambda: int(val) if (val := os.getenv("AWS_MAX_ATTEMPTS")) else None,
|
||||||
description="An integer representing the maximum number of attempts that will be made for a single request, "
|
description="An integer representing the maximum number of attempts that will be made for a single request, "
|
||||||
"including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
|
"including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
|
||||||
)
|
)
|
||||||
retry_mode: str | None = Field(
|
retry_mode: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_RETRY_MODE"),
|
||||||
description="A string representing the type of retries Boto3 will perform."
|
description="A string representing the type of retries Boto3 will perform."
|
||||||
"Default use environment variable: AWS_RETRY_MODE",
|
"Default use environment variable: AWS_RETRY_MODE",
|
||||||
)
|
)
|
||||||
connect_timeout: float | None = Field(
|
connect_timeout: float | None = Field(
|
||||||
default=60,
|
default_factory=lambda: float(os.getenv("AWS_CONNECT_TIMEOUT", "60")),
|
||||||
description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
|
description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
|
||||||
"The default is 60 seconds.",
|
"The default is 60 seconds.",
|
||||||
)
|
)
|
||||||
read_timeout: float | None = Field(
|
read_timeout: float | None = Field(
|
||||||
default=60,
|
default_factory=lambda: float(os.getenv("AWS_READ_TIMEOUT", "60")),
|
||||||
description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
|
description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
|
||||||
"The default is 60 seconds.",
|
"The default is 60 seconds.",
|
||||||
)
|
)
|
||||||
session_ttl: int | None = Field(
|
session_ttl: int | None = Field(
|
||||||
default=3600,
|
default_factory=lambda: int(os.getenv("AWS_SESSION_TTL", "3600")),
|
||||||
description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
|
description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import base64
|
import base64
|
||||||
import struct
|
import struct
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
@ -43,9 +44,11 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
task_type: EmbeddingTaskType | None = None,
|
task_type: EmbeddingTaskType | None = None,
|
||||||
) -> EmbeddingsResponse:
|
) -> EmbeddingsResponse:
|
||||||
model = await self.model_store.get_model(model_id)
|
model = await self.model_store.get_model(model_id)
|
||||||
embedding_model = self._load_sentence_transformer_model(model.provider_resource_id)
|
embedding_model = await self._load_sentence_transformer_model(model.provider_resource_id)
|
||||||
embeddings = embedding_model.encode(
|
embeddings = await asyncio.to_thread(
|
||||||
[interleaved_content_as_str(content) for content in contents], show_progress_bar=False
|
embedding_model.encode,
|
||||||
|
[interleaved_content_as_str(content) for content in contents],
|
||||||
|
show_progress_bar=False,
|
||||||
)
|
)
|
||||||
return EmbeddingsResponse(embeddings=embeddings)
|
return EmbeddingsResponse(embeddings=embeddings)
|
||||||
|
|
||||||
|
@ -64,8 +67,8 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
|
|
||||||
# Get the model and generate embeddings
|
# Get the model and generate embeddings
|
||||||
model_obj = await self.model_store.get_model(model)
|
model_obj = await self.model_store.get_model(model)
|
||||||
embedding_model = self._load_sentence_transformer_model(model_obj.provider_resource_id)
|
embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id)
|
||||||
embeddings = embedding_model.encode(input_list, show_progress_bar=False)
|
embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False)
|
||||||
|
|
||||||
# Convert embeddings to the requested format
|
# Convert embeddings to the requested format
|
||||||
data = []
|
data = []
|
||||||
|
@ -93,7 +96,7 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
usage=usage,
|
usage=usage,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _load_sentence_transformer_model(self, model: str) -> "SentenceTransformer":
|
async def _load_sentence_transformer_model(self, model: str) -> "SentenceTransformer":
|
||||||
global EMBEDDING_MODELS
|
global EMBEDDING_MODELS
|
||||||
|
|
||||||
loaded_model = EMBEDDING_MODELS.get(model)
|
loaded_model = EMBEDDING_MODELS.get(model)
|
||||||
|
@ -101,8 +104,12 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
return loaded_model
|
return loaded_model
|
||||||
|
|
||||||
log.info(f"Loading sentence transformer for {model}...")
|
log.info(f"Loading sentence transformer for {model}...")
|
||||||
from sentence_transformers import SentenceTransformer
|
|
||||||
|
|
||||||
loaded_model = SentenceTransformer(model)
|
def _load_model():
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
|
return SentenceTransformer(model)
|
||||||
|
|
||||||
|
loaded_model = await asyncio.to_thread(_load_model)
|
||||||
EMBEDDING_MODELS[model] = loaded_model
|
EMBEDDING_MODELS[model] = loaded_model
|
||||||
return loaded_model
|
return loaded_model
|
||||||
|
|
|
@ -294,12 +294,12 @@ class VectorDBWithIndex:
|
||||||
_validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
|
_validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
|
||||||
|
|
||||||
if chunks_to_embed:
|
if chunks_to_embed:
|
||||||
resp = await self.inference_api.embeddings(
|
resp = await self.inference_api.openai_embeddings(
|
||||||
self.vector_db.embedding_model,
|
self.vector_db.embedding_model,
|
||||||
[c.content for c in chunks_to_embed],
|
[c.content for c in chunks_to_embed],
|
||||||
)
|
)
|
||||||
for c, embedding in zip(chunks_to_embed, resp.embeddings, strict=False):
|
for c, data in zip(chunks_to_embed, resp.data, strict=False):
|
||||||
c.embedding = embedding
|
c.embedding = data.embedding
|
||||||
|
|
||||||
embeddings = np.array([c.embedding for c in chunks], dtype=np.float32)
|
embeddings = np.array([c.embedding for c in chunks], dtype=np.float32)
|
||||||
await self.index.add_chunks(chunks, embeddings)
|
await self.index.add_chunks(chunks, embeddings)
|
||||||
|
@ -334,8 +334,8 @@ class VectorDBWithIndex:
|
||||||
if mode == "keyword":
|
if mode == "keyword":
|
||||||
return await self.index.query_keyword(query_string, k, score_threshold)
|
return await self.index.query_keyword(query_string, k, score_threshold)
|
||||||
|
|
||||||
embeddings_response = await self.inference_api.embeddings(self.vector_db.embedding_model, [query_string])
|
embeddings_response = await self.inference_api.openai_embeddings(self.vector_db.embedding_model, [query_string])
|
||||||
query_vector = np.array(embeddings_response.embeddings[0], dtype=np.float32)
|
query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32)
|
||||||
if mode == "hybrid":
|
if mode == "hybrid":
|
||||||
return await self.index.query_hybrid(
|
return await self.index.query_hybrid(
|
||||||
query_vector, query_string, k, score_threshold, reranker_type, reranker_params
|
query_vector, query_string, k, score_threshold, reranker_type, reranker_params
|
||||||
|
|
|
@ -23,6 +23,7 @@ from sqlalchemy import (
|
||||||
)
|
)
|
||||||
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||||
from sqlalchemy.ext.asyncio.engine import AsyncEngine
|
from sqlalchemy.ext.asyncio.engine import AsyncEngine
|
||||||
|
from sqlalchemy.sql.elements import ColumnElement
|
||||||
|
|
||||||
from llama_stack.apis.common.responses import PaginatedResponse
|
from llama_stack.apis.common.responses import PaginatedResponse
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
@ -43,6 +44,30 @@ TYPE_MAPPING: dict[ColumnType, Any] = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement:
|
||||||
|
"""Return a SQLAlchemy expression for a where condition.
|
||||||
|
|
||||||
|
`value` may be a simple scalar (equality) or a mapping like {">": 123}.
|
||||||
|
The returned expression is a SQLAlchemy ColumnElement usable in query.where(...).
|
||||||
|
"""
|
||||||
|
if isinstance(value, Mapping):
|
||||||
|
if len(value) != 1:
|
||||||
|
raise ValueError(f"Operator mapping must have a single operator, got: {value}")
|
||||||
|
op, operand = next(iter(value.items()))
|
||||||
|
if op == "==" or op == "=":
|
||||||
|
return column == operand
|
||||||
|
if op == ">":
|
||||||
|
return column > operand
|
||||||
|
if op == "<":
|
||||||
|
return column < operand
|
||||||
|
if op == ">=":
|
||||||
|
return column >= operand
|
||||||
|
if op == "<=":
|
||||||
|
return column <= operand
|
||||||
|
raise ValueError(f"Unsupported operator '{op}' in where mapping")
|
||||||
|
return column == value
|
||||||
|
|
||||||
|
|
||||||
class SqlAlchemySqlStoreImpl(SqlStore):
|
class SqlAlchemySqlStoreImpl(SqlStore):
|
||||||
def __init__(self, config: SqlAlchemySqlStoreConfig):
|
def __init__(self, config: SqlAlchemySqlStoreConfig):
|
||||||
self.config = config
|
self.config = config
|
||||||
|
@ -111,7 +136,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
|
||||||
|
|
||||||
if where:
|
if where:
|
||||||
for key, value in where.items():
|
for key, value in where.items():
|
||||||
query = query.where(table_obj.c[key] == value)
|
query = query.where(_build_where_expr(table_obj.c[key], value))
|
||||||
|
|
||||||
if where_sql:
|
if where_sql:
|
||||||
query = query.where(text(where_sql))
|
query = query.where(text(where_sql))
|
||||||
|
@ -222,7 +247,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
|
||||||
async with self.async_session() as session:
|
async with self.async_session() as session:
|
||||||
stmt = self.metadata.tables[table].update()
|
stmt = self.metadata.tables[table].update()
|
||||||
for key, value in where.items():
|
for key, value in where.items():
|
||||||
stmt = stmt.where(self.metadata.tables[table].c[key] == value)
|
stmt = stmt.where(_build_where_expr(self.metadata.tables[table].c[key], value))
|
||||||
await session.execute(stmt, data)
|
await session.execute(stmt, data)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
|
@ -233,7 +258,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
|
||||||
async with self.async_session() as session:
|
async with self.async_session() as session:
|
||||||
stmt = self.metadata.tables[table].delete()
|
stmt = self.metadata.tables[table].delete()
|
||||||
for key, value in where.items():
|
for key, value in where.items():
|
||||||
stmt = stmt.where(self.metadata.tables[table].c[key] == value)
|
stmt = stmt.where(_build_where_expr(self.metadata.tables[table].c[key], value))
|
||||||
await session.execute(stmt)
|
await session.execute(stmt)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
|
|
|
@ -67,6 +67,38 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
|
||||||
raise AuthenticationRequiredError(exc) from exc
|
raise AuthenticationRequiredError(exc) from exc
|
||||||
if i == len(connection_strategies) - 1:
|
if i == len(connection_strategies) - 1:
|
||||||
raise
|
raise
|
||||||
|
except* httpx.ConnectError as eg:
|
||||||
|
# Connection refused, server down, network unreachable
|
||||||
|
if i == len(connection_strategies) - 1:
|
||||||
|
error_msg = f"Failed to connect to MCP server at {endpoint}: Connection refused"
|
||||||
|
logger.error(f"MCP connection error: {error_msg}")
|
||||||
|
raise ConnectionError(error_msg) from eg
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"failed to connect to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
||||||
|
)
|
||||||
|
except* httpx.TimeoutException as eg:
|
||||||
|
# Request timeout, server too slow
|
||||||
|
if i == len(connection_strategies) - 1:
|
||||||
|
error_msg = f"MCP server at {endpoint} timed out"
|
||||||
|
logger.error(f"MCP timeout error: {error_msg}")
|
||||||
|
raise TimeoutError(error_msg) from eg
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"MCP server at {endpoint} timed out via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
||||||
|
)
|
||||||
|
except* httpx.RequestError as eg:
|
||||||
|
# DNS resolution failures, network errors, invalid URLs
|
||||||
|
if i == len(connection_strategies) - 1:
|
||||||
|
# Get the first exception's message for the error string
|
||||||
|
exc_msg = str(eg.exceptions[0]) if eg.exceptions else "Unknown error"
|
||||||
|
error_msg = f"Network error connecting to MCP server at {endpoint}: {exc_msg}"
|
||||||
|
logger.error(f"MCP network error: {error_msg}")
|
||||||
|
raise ConnectionError(error_msg) from eg
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"network error connecting to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
||||||
|
)
|
||||||
except* McpError:
|
except* McpError:
|
||||||
if i < len(connection_strategies) - 1:
|
if i < len(connection_strategies) - 1:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|
|
@ -37,3 +37,122 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str:
|
||||||
else:
|
else:
|
||||||
s = proper_case(re.sub(r"[^a-zA-Z0-9]", "", name))
|
s = proper_case(re.sub(r"[^a-zA-Z0-9]", "", name))
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
class WeightedInMemoryAggregator:
|
||||||
|
@staticmethod
|
||||||
|
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
|
||||||
|
"""
|
||||||
|
Normalize scores to 0-1 range using min-max normalization.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scores: dictionary of scores with document IDs as keys and scores as values
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized scores with document IDs as keys and normalized scores as values
|
||||||
|
"""
|
||||||
|
if not scores:
|
||||||
|
return {}
|
||||||
|
min_score, max_score = min(scores.values()), max(scores.values())
|
||||||
|
score_range = max_score - min_score
|
||||||
|
if score_range > 0:
|
||||||
|
return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
|
||||||
|
return dict.fromkeys(scores, 1.0)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def weighted_rerank(
|
||||||
|
vector_scores: dict[str, float],
|
||||||
|
keyword_scores: dict[str, float],
|
||||||
|
alpha: float = 0.5,
|
||||||
|
) -> dict[str, float]:
|
||||||
|
"""
|
||||||
|
Rerank via weighted average of scores.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vector_scores: scores from vector search
|
||||||
|
keyword_scores: scores from keyword search
|
||||||
|
alpha: weight factor between 0 and 1 (default: 0.5)
|
||||||
|
0 = keyword only, 1 = vector only, 0.5 = equal weight
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
All unique document IDs with weighted combined scores
|
||||||
|
"""
|
||||||
|
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
|
||||||
|
normalized_vector_scores = WeightedInMemoryAggregator._normalize_scores(vector_scores)
|
||||||
|
normalized_keyword_scores = WeightedInMemoryAggregator._normalize_scores(keyword_scores)
|
||||||
|
|
||||||
|
# Weighted formula: score = (1-alpha) * keyword_score + alpha * vector_score
|
||||||
|
# alpha=0 means keyword only, alpha=1 means vector only
|
||||||
|
return {
|
||||||
|
doc_id: ((1 - alpha) * normalized_keyword_scores.get(doc_id, 0.0))
|
||||||
|
+ (alpha * normalized_vector_scores.get(doc_id, 0.0))
|
||||||
|
for doc_id in all_ids
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def rrf_rerank(
|
||||||
|
vector_scores: dict[str, float],
|
||||||
|
keyword_scores: dict[str, float],
|
||||||
|
impact_factor: float = 60.0,
|
||||||
|
) -> dict[str, float]:
|
||||||
|
"""
|
||||||
|
Rerank via Reciprocal Rank Fusion.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vector_scores: scores from vector search
|
||||||
|
keyword_scores: scores from keyword search
|
||||||
|
impact_factor: impact factor for RRF (default: 60.0)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
All unique document IDs with RRF combined scores
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Convert scores to ranks
|
||||||
|
vector_ranks = {
|
||||||
|
doc_id: i + 1
|
||||||
|
for i, (doc_id, _) in enumerate(sorted(vector_scores.items(), key=lambda x: x[1], reverse=True))
|
||||||
|
}
|
||||||
|
keyword_ranks = {
|
||||||
|
doc_id: i + 1
|
||||||
|
for i, (doc_id, _) in enumerate(sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True))
|
||||||
|
}
|
||||||
|
|
||||||
|
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
|
||||||
|
rrf_scores = {}
|
||||||
|
for doc_id in all_ids:
|
||||||
|
vector_rank = vector_ranks.get(doc_id, float("inf"))
|
||||||
|
keyword_rank = keyword_ranks.get(doc_id, float("inf"))
|
||||||
|
|
||||||
|
# RRF formula: score = 1/(k + r) where k is impact_factor (default: 60.0) and r is the rank
|
||||||
|
rrf_scores[doc_id] = (1.0 / (impact_factor + vector_rank)) + (1.0 / (impact_factor + keyword_rank))
|
||||||
|
return rrf_scores
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def combine_search_results(
|
||||||
|
vector_scores: dict[str, float],
|
||||||
|
keyword_scores: dict[str, float],
|
||||||
|
reranker_type: str = "rrf",
|
||||||
|
reranker_params: dict[str, float] | None = None,
|
||||||
|
) -> dict[str, float]:
|
||||||
|
"""
|
||||||
|
Combine vector and keyword search results using specified reranking strategy.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vector_scores: scores from vector search
|
||||||
|
keyword_scores: scores from keyword search
|
||||||
|
reranker_type: type of reranker to use (default: RERANKER_TYPE_RRF)
|
||||||
|
reranker_params: parameters for the reranker
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
All unique document IDs with combined scores
|
||||||
|
"""
|
||||||
|
if reranker_params is None:
|
||||||
|
reranker_params = {}
|
||||||
|
|
||||||
|
if reranker_type == "weighted":
|
||||||
|
alpha = reranker_params.get("alpha", 0.5)
|
||||||
|
return WeightedInMemoryAggregator.weighted_rerank(vector_scores, keyword_scores, alpha)
|
||||||
|
else:
|
||||||
|
# Default to RRF for None, RRF, or any unknown types
|
||||||
|
impact_factor = reranker_params.get("impact_factor", 60.0)
|
||||||
|
return WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor)
|
||||||
|
|
|
@ -30,6 +30,9 @@ from openai.types.completion_choice import CompletionChoice
|
||||||
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
|
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
|
||||||
CompletionChoice.model_rebuild()
|
CompletionChoice.model_rebuild()
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).parent.parent.parent
|
||||||
|
DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/recordings"
|
||||||
|
|
||||||
|
|
||||||
class InferenceMode(StrEnum):
|
class InferenceMode(StrEnum):
|
||||||
LIVE = "live"
|
LIVE = "live"
|
||||||
|
@ -51,7 +54,7 @@ def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict
|
||||||
|
|
||||||
|
|
||||||
def get_inference_mode() -> InferenceMode:
|
def get_inference_mode() -> InferenceMode:
|
||||||
return InferenceMode(os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "live").lower())
|
return InferenceMode(os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "replay").lower())
|
||||||
|
|
||||||
|
|
||||||
def setup_inference_recording():
|
def setup_inference_recording():
|
||||||
|
@ -60,28 +63,18 @@ def setup_inference_recording():
|
||||||
to increase their reliability and reduce reliance on expensive, external services.
|
to increase their reliability and reduce reliance on expensive, external services.
|
||||||
|
|
||||||
Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
|
Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
|
||||||
Calls to the /models endpoint are not currently trapped. We probably need to add support for this.
|
|
||||||
|
|
||||||
Two environment variables are required:
|
Two environment variables are supported:
|
||||||
- LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'.
|
- LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'. Default is 'replay'.
|
||||||
- LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in.
|
- LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in. Default is 'tests/integration/recordings'.
|
||||||
|
|
||||||
The recordings are stored in a SQLite database and a JSON file for each request. The SQLite database is used to
|
The recordings are stored as JSON files.
|
||||||
quickly find the correct recording for a given request. The JSON files are used to store the request and response
|
|
||||||
bodies.
|
|
||||||
"""
|
"""
|
||||||
mode = get_inference_mode()
|
mode = get_inference_mode()
|
||||||
|
|
||||||
if mode not in InferenceMode:
|
|
||||||
raise ValueError(f"Invalid LLAMA_STACK_TEST_INFERENCE_MODE: {mode}. Must be 'live', 'record', or 'replay'")
|
|
||||||
|
|
||||||
if mode == InferenceMode.LIVE:
|
if mode == InferenceMode.LIVE:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if "LLAMA_STACK_TEST_RECORDING_DIR" not in os.environ:
|
storage_dir = os.environ.get("LLAMA_STACK_TEST_RECORDING_DIR", DEFAULT_STORAGE_DIR)
|
||||||
raise ValueError("LLAMA_STACK_TEST_RECORDING_DIR must be set for recording or replaying")
|
|
||||||
storage_dir = os.environ["LLAMA_STACK_TEST_RECORDING_DIR"]
|
|
||||||
|
|
||||||
return inference_recording(mode=mode, storage_dir=storage_dir)
|
return inference_recording(mode=mode, storage_dir=storage_dir)
|
||||||
|
|
||||||
|
|
||||||
|
@ -134,8 +127,8 @@ class ResponseStorage:
|
||||||
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
|
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
|
||||||
"""Store a request/response pair."""
|
"""Store a request/response pair."""
|
||||||
# Generate unique response filename
|
# Generate unique response filename
|
||||||
response_file = f"{request_hash[:12]}.json"
|
short_hash = request_hash[:12]
|
||||||
response_path = self.responses_dir / response_file
|
response_file = f"{short_hash}.json"
|
||||||
|
|
||||||
# Serialize response body if needed
|
# Serialize response body if needed
|
||||||
serialized_response = dict(response)
|
serialized_response = dict(response)
|
||||||
|
@ -147,6 +140,14 @@ class ResponseStorage:
|
||||||
# Handle single response
|
# Handle single response
|
||||||
serialized_response["body"] = _serialize_response(serialized_response["body"])
|
serialized_response["body"] = _serialize_response(serialized_response["body"])
|
||||||
|
|
||||||
|
# If this is an Ollama /api/tags recording, include models digest in filename to distinguish variants
|
||||||
|
endpoint = request.get("endpoint")
|
||||||
|
if endpoint in ("/api/tags", "/v1/models"):
|
||||||
|
digest = _model_identifiers_digest(endpoint, response)
|
||||||
|
response_file = f"models-{short_hash}-{digest}.json"
|
||||||
|
|
||||||
|
response_path = self.responses_dir / response_file
|
||||||
|
|
||||||
# Save response to JSON file
|
# Save response to JSON file
|
||||||
with open(response_path, "w") as f:
|
with open(response_path, "w") as f:
|
||||||
json.dump({"request": request, "response": serialized_response}, f, indent=2)
|
json.dump({"request": request, "response": serialized_response}, f, indent=2)
|
||||||
|
@ -161,19 +162,85 @@ class ResponseStorage:
|
||||||
if not response_path.exists():
|
if not response_path.exists():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
with open(response_path) as f:
|
return _recording_from_file(response_path)
|
||||||
data = json.load(f)
|
|
||||||
|
|
||||||
# Deserialize response body if needed
|
def _model_list_responses(self, short_hash: str) -> list[dict[str, Any]]:
|
||||||
if "response" in data and "body" in data["response"]:
|
results: list[dict[str, Any]] = []
|
||||||
if isinstance(data["response"]["body"], list):
|
for path in self.responses_dir.glob(f"models-{short_hash}-*.json"):
|
||||||
# Handle streaming responses
|
data = _recording_from_file(path)
|
||||||
data["response"]["body"] = [_deserialize_response(chunk) for chunk in data["response"]["body"]]
|
results.append(data)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _recording_from_file(response_path) -> dict[str, Any]:
|
||||||
|
with open(response_path) as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
# Deserialize response body if needed
|
||||||
|
if "response" in data and "body" in data["response"]:
|
||||||
|
if isinstance(data["response"]["body"], list):
|
||||||
|
# Handle streaming responses
|
||||||
|
data["response"]["body"] = [_deserialize_response(chunk) for chunk in data["response"]["body"]]
|
||||||
|
else:
|
||||||
|
# Handle single response
|
||||||
|
data["response"]["body"] = _deserialize_response(data["response"]["body"])
|
||||||
|
|
||||||
|
return cast(dict[str, Any], data)
|
||||||
|
|
||||||
|
|
||||||
|
def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
||||||
|
def _extract_model_identifiers():
|
||||||
|
"""Extract a stable set of identifiers for model-list endpoints.
|
||||||
|
|
||||||
|
Supported endpoints:
|
||||||
|
- '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
|
||||||
|
- '/v1/models' (OpenAI): response body has 'data': [ { id: ... }, ... ]
|
||||||
|
Returns a list of unique identifiers or None if structure doesn't match.
|
||||||
|
"""
|
||||||
|
body = response["body"]
|
||||||
|
if endpoint == "/api/tags":
|
||||||
|
items = body.get("models")
|
||||||
|
idents = [m.model for m in items]
|
||||||
|
else:
|
||||||
|
items = body.get("data")
|
||||||
|
idents = [m.id for m in items]
|
||||||
|
return sorted(set(idents))
|
||||||
|
|
||||||
|
identifiers = _extract_model_identifiers()
|
||||||
|
return hashlib.sha1(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
|
||||||
|
|
||||||
|
|
||||||
|
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
|
||||||
|
"""Return a single, unioned recording for supported model-list endpoints."""
|
||||||
|
seen: dict[str, dict[str, Any]] = {}
|
||||||
|
for rec in records:
|
||||||
|
body = rec["response"]["body"]
|
||||||
|
if endpoint == "/api/tags":
|
||||||
|
items = body.models
|
||||||
|
elif endpoint == "/v1/models":
|
||||||
|
items = body.data
|
||||||
|
else:
|
||||||
|
items = []
|
||||||
|
|
||||||
|
for m in items:
|
||||||
|
if endpoint == "/v1/models":
|
||||||
|
key = m.id
|
||||||
else:
|
else:
|
||||||
# Handle single response
|
key = m.model
|
||||||
data["response"]["body"] = _deserialize_response(data["response"]["body"])
|
seen[key] = m
|
||||||
|
|
||||||
return cast(dict[str, Any], data)
|
ordered = [seen[k] for k in sorted(seen.keys())]
|
||||||
|
canonical = records[0]
|
||||||
|
canonical_req = canonical.get("request", {})
|
||||||
|
if isinstance(canonical_req, dict):
|
||||||
|
canonical_req["endpoint"] = endpoint
|
||||||
|
if endpoint == "/v1/models":
|
||||||
|
body = {"data": ordered, "object": "list"}
|
||||||
|
else:
|
||||||
|
from ollama import ListResponse
|
||||||
|
|
||||||
|
body = ListResponse(models=ordered)
|
||||||
|
return {"request": canonical_req, "response": {"body": body, "is_streaming": False}}
|
||||||
|
|
||||||
|
|
||||||
async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs):
|
async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs):
|
||||||
|
@ -195,8 +262,6 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
raise ValueError(f"Unknown client type: {client_type}")
|
raise ValueError(f"Unknown client type: {client_type}")
|
||||||
|
|
||||||
url = base_url.rstrip("/") + endpoint
|
url = base_url.rstrip("/") + endpoint
|
||||||
|
|
||||||
# Normalize request for matching
|
|
||||||
method = "POST"
|
method = "POST"
|
||||||
headers = {}
|
headers = {}
|
||||||
body = kwargs
|
body = kwargs
|
||||||
|
@ -204,7 +269,12 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
request_hash = normalize_request(method, url, headers, body)
|
request_hash = normalize_request(method, url, headers, body)
|
||||||
|
|
||||||
if _current_mode == InferenceMode.REPLAY:
|
if _current_mode == InferenceMode.REPLAY:
|
||||||
recording = _current_storage.find_recording(request_hash)
|
# Special handling for model-list endpoints: return union of all responses
|
||||||
|
if endpoint in ("/api/tags", "/v1/models"):
|
||||||
|
records = _current_storage._model_list_responses(request_hash[:12])
|
||||||
|
recording = _combine_model_list_responses(endpoint, records)
|
||||||
|
else:
|
||||||
|
recording = _current_storage.find_recording(request_hash)
|
||||||
if recording:
|
if recording:
|
||||||
response_body = recording["response"]["body"]
|
response_body = recording["response"]["body"]
|
||||||
|
|
||||||
|
@ -274,12 +344,14 @@ def patch_inference_clients():
|
||||||
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
||||||
from openai.resources.completions import AsyncCompletions
|
from openai.resources.completions import AsyncCompletions
|
||||||
from openai.resources.embeddings import AsyncEmbeddings
|
from openai.resources.embeddings import AsyncEmbeddings
|
||||||
|
from openai.resources.models import AsyncModels
|
||||||
|
|
||||||
# Store original methods for both OpenAI and Ollama clients
|
# Store original methods for both OpenAI and Ollama clients
|
||||||
_original_methods = {
|
_original_methods = {
|
||||||
"chat_completions_create": AsyncChatCompletions.create,
|
"chat_completions_create": AsyncChatCompletions.create,
|
||||||
"completions_create": AsyncCompletions.create,
|
"completions_create": AsyncCompletions.create,
|
||||||
"embeddings_create": AsyncEmbeddings.create,
|
"embeddings_create": AsyncEmbeddings.create,
|
||||||
|
"models_list": AsyncModels.list,
|
||||||
"ollama_generate": OllamaAsyncClient.generate,
|
"ollama_generate": OllamaAsyncClient.generate,
|
||||||
"ollama_chat": OllamaAsyncClient.chat,
|
"ollama_chat": OllamaAsyncClient.chat,
|
||||||
"ollama_embed": OllamaAsyncClient.embed,
|
"ollama_embed": OllamaAsyncClient.embed,
|
||||||
|
@ -304,10 +376,16 @@ def patch_inference_clients():
|
||||||
_original_methods["embeddings_create"], self, "openai", "/v1/embeddings", *args, **kwargs
|
_original_methods["embeddings_create"], self, "openai", "/v1/embeddings", *args, **kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def patched_models_list(self, *args, **kwargs):
|
||||||
|
return await _patched_inference_method(
|
||||||
|
_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
# Apply OpenAI patches
|
# Apply OpenAI patches
|
||||||
AsyncChatCompletions.create = patched_chat_completions_create
|
AsyncChatCompletions.create = patched_chat_completions_create
|
||||||
AsyncCompletions.create = patched_completions_create
|
AsyncCompletions.create = patched_completions_create
|
||||||
AsyncEmbeddings.create = patched_embeddings_create
|
AsyncEmbeddings.create = patched_embeddings_create
|
||||||
|
AsyncModels.list = patched_models_list
|
||||||
|
|
||||||
# Create patched methods for Ollama client
|
# Create patched methods for Ollama client
|
||||||
async def patched_ollama_generate(self, *args, **kwargs):
|
async def patched_ollama_generate(self, *args, **kwargs):
|
||||||
|
@ -361,11 +439,13 @@ def unpatch_inference_clients():
|
||||||
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
||||||
from openai.resources.completions import AsyncCompletions
|
from openai.resources.completions import AsyncCompletions
|
||||||
from openai.resources.embeddings import AsyncEmbeddings
|
from openai.resources.embeddings import AsyncEmbeddings
|
||||||
|
from openai.resources.models import AsyncModels
|
||||||
|
|
||||||
# Restore OpenAI client methods
|
# Restore OpenAI client methods
|
||||||
AsyncChatCompletions.create = _original_methods["chat_completions_create"]
|
AsyncChatCompletions.create = _original_methods["chat_completions_create"]
|
||||||
AsyncCompletions.create = _original_methods["completions_create"]
|
AsyncCompletions.create = _original_methods["completions_create"]
|
||||||
AsyncEmbeddings.create = _original_methods["embeddings_create"]
|
AsyncEmbeddings.create = _original_methods["embeddings_create"]
|
||||||
|
AsyncModels.list = _original_methods["models_list"]
|
||||||
|
|
||||||
# Restore Ollama client methods if they were patched
|
# Restore Ollama client methods if they were patched
|
||||||
OllamaAsyncClient.generate = _original_methods["ollama_generate"]
|
OllamaAsyncClient.generate = _original_methods["ollama_generate"]
|
||||||
|
@ -379,16 +459,10 @@ def unpatch_inference_clients():
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def inference_recording(mode: str = "live", storage_dir: str | Path | None = None) -> Generator[None, None, None]:
|
def inference_recording(mode: str, storage_dir: str | Path | None = None) -> Generator[None, None, None]:
|
||||||
"""Context manager for inference recording/replaying."""
|
"""Context manager for inference recording/replaying."""
|
||||||
global _current_mode, _current_storage
|
global _current_mode, _current_storage
|
||||||
|
|
||||||
# Set defaults
|
|
||||||
if storage_dir is None:
|
|
||||||
storage_dir_path = Path.home() / ".llama" / "recordings"
|
|
||||||
else:
|
|
||||||
storage_dir_path = Path(storage_dir)
|
|
||||||
|
|
||||||
# Store previous state
|
# Store previous state
|
||||||
prev_mode = _current_mode
|
prev_mode = _current_mode
|
||||||
prev_storage = _current_storage
|
prev_storage = _current_storage
|
||||||
|
@ -397,7 +471,9 @@ def inference_recording(mode: str = "live", storage_dir: str | Path | None = Non
|
||||||
_current_mode = mode
|
_current_mode = mode
|
||||||
|
|
||||||
if mode in ["record", "replay"]:
|
if mode in ["record", "replay"]:
|
||||||
_current_storage = ResponseStorage(storage_dir_path)
|
if storage_dir is None:
|
||||||
|
raise ValueError("storage_dir is required for record and replay modes")
|
||||||
|
_current_storage = ResponseStorage(Path(storage_dir))
|
||||||
patch_inference_clients()
|
patch_inference_clients()
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
305
llama_stack/ui/package-lock.json
generated
305
llama_stack/ui/package-lock.json
generated
|
@ -14,11 +14,11 @@
|
||||||
"@radix-ui/react-select": "^2.2.5",
|
"@radix-ui/react-select": "^2.2.5",
|
||||||
"@radix-ui/react-separator": "^1.1.7",
|
"@radix-ui/react-separator": "^1.1.7",
|
||||||
"@radix-ui/react-slot": "^1.2.3",
|
"@radix-ui/react-slot": "^1.2.3",
|
||||||
"@radix-ui/react-tooltip": "^1.2.6",
|
"@radix-ui/react-tooltip": "^1.2.8",
|
||||||
"class-variance-authority": "^0.7.1",
|
"class-variance-authority": "^0.7.1",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"framer-motion": "^11.18.2",
|
"framer-motion": "^12.23.12",
|
||||||
"llama-stack-client": "^0.2.19",
|
"llama-stack-client": "^0.2.20",
|
||||||
"lucide-react": "^0.510.0",
|
"lucide-react": "^0.510.0",
|
||||||
"next": "15.3.3",
|
"next": "15.3.3",
|
||||||
"next-auth": "^4.24.11",
|
"next-auth": "^4.24.11",
|
||||||
|
@ -39,16 +39,16 @@
|
||||||
"@testing-library/jest-dom": "^6.8.0",
|
"@testing-library/jest-dom": "^6.8.0",
|
||||||
"@testing-library/react": "^16.3.0",
|
"@testing-library/react": "^16.3.0",
|
||||||
"@types/jest": "^29.5.14",
|
"@types/jest": "^29.5.14",
|
||||||
"@types/node": "^20",
|
"@types/node": "^24",
|
||||||
"@types/react": "^19",
|
"@types/react": "^19",
|
||||||
"@types/react-dom": "^19",
|
"@types/react-dom": "^19",
|
||||||
"eslint": "^9",
|
"eslint": "^9",
|
||||||
"eslint-config-next": "15.3.2",
|
"eslint-config-next": "15.5.2",
|
||||||
"eslint-config-prettier": "^10.1.8",
|
"eslint-config-prettier": "^10.1.8",
|
||||||
"eslint-plugin-prettier": "^5.5.4",
|
"eslint-plugin-prettier": "^5.5.4",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
"jest-environment-jsdom": "^29.7.0",
|
"jest-environment-jsdom": "^29.7.0",
|
||||||
"prettier": "3.5.3",
|
"prettier": "3.6.2",
|
||||||
"tailwindcss": "^4",
|
"tailwindcss": "^4",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
"tw-animate-css": "^1.2.9",
|
"tw-animate-css": "^1.2.9",
|
||||||
|
@ -1854,9 +1854,9 @@
|
||||||
"integrity": "sha512-OdiMrzCl2Xi0VTjiQQUK0Xh7bJHnOuET2s+3V+Y40WJBAXrJeGA3f+I8MZJ/YQ3mVGi5XGR1L66oFlgqXhQ4Vw=="
|
"integrity": "sha512-OdiMrzCl2Xi0VTjiQQUK0Xh7bJHnOuET2s+3V+Y40WJBAXrJeGA3f+I8MZJ/YQ3mVGi5XGR1L66oFlgqXhQ4Vw=="
|
||||||
},
|
},
|
||||||
"node_modules/@next/eslint-plugin-next": {
|
"node_modules/@next/eslint-plugin-next": {
|
||||||
"version": "15.3.2",
|
"version": "15.5.2",
|
||||||
"resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-15.3.2.tgz",
|
"resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-15.5.2.tgz",
|
||||||
"integrity": "sha512-ijVRTXBgnHT33aWnDtmlG+LJD+5vhc9AKTJPquGG5NKXjpKNjc62woIhFtrAcWdBobt8kqjCoaJ0q6sDQoX7aQ==",
|
"integrity": "sha512-lkLrRVxcftuOsJNhWatf1P2hNVfh98k/omQHrCEPPriUypR6RcS13IvLdIrEvkm9AH2Nu2YpR5vLqBuy6twH3Q==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
@ -2861,29 +2861,6 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-visually-hidden": {
|
|
||||||
"version": "1.2.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz",
|
|
||||||
"integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@radix-ui/react-primitive": "2.1.3"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@types/react": "*",
|
|
||||||
"@types/react-dom": "*",
|
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
|
||||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"@types/react": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"@types/react-dom": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@radix-ui/react-separator": {
|
"node_modules/@radix-ui/react-separator": {
|
||||||
"version": "1.1.7",
|
"version": "1.1.7",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz",
|
||||||
|
@ -2949,23 +2926,23 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-tooltip": {
|
"node_modules/@radix-ui/react-tooltip": {
|
||||||
"version": "1.2.6",
|
"version": "1.2.8",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.6.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz",
|
||||||
"integrity": "sha512-zYb+9dc9tkoN2JjBDIIPLQtk3gGyz8FMKoqYTb8EMVQ5a5hBcdHPECrsZVI4NpPAUOixhkoqg7Hj5ry5USowfA==",
|
"integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/primitive": "1.1.2",
|
"@radix-ui/primitive": "1.1.3",
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-dismissable-layer": "1.1.9",
|
"@radix-ui/react-dismissable-layer": "1.1.11",
|
||||||
"@radix-ui/react-id": "1.1.1",
|
"@radix-ui/react-id": "1.1.1",
|
||||||
"@radix-ui/react-popper": "1.2.6",
|
"@radix-ui/react-popper": "1.2.8",
|
||||||
"@radix-ui/react-portal": "1.1.8",
|
"@radix-ui/react-portal": "1.1.9",
|
||||||
"@radix-ui/react-presence": "1.1.4",
|
"@radix-ui/react-presence": "1.1.5",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-slot": "1.2.2",
|
"@radix-ui/react-slot": "1.2.3",
|
||||||
"@radix-ui/react-use-controllable-state": "1.2.2",
|
"@radix-ui/react-use-controllable-state": "1.2.2",
|
||||||
"@radix-ui/react-visually-hidden": "1.2.2"
|
"@radix-ui/react-visually-hidden": "1.2.3"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
|
@ -2982,21 +2959,162 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-slot": {
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/primitive": {
|
||||||
"version": "1.2.2",
|
"version": "1.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
"integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==",
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-arrow": {
|
||||||
|
"version": "1.1.7",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
|
||||||
|
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-compose-refs": "1.1.2"
|
"@radix-ui/react-primitive": "2.1.3"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
},
|
},
|
||||||
"peerDependenciesMeta": {
|
"peerDependenciesMeta": {
|
||||||
"@types/react": {
|
"@types/react": {
|
||||||
"optional": true
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-dismissable-layer": {
|
||||||
|
"version": "1.1.11",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
|
||||||
|
"integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/primitive": "1.1.3",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
|
"@radix-ui/react-use-escape-keydown": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-popper": {
|
||||||
|
"version": "1.2.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
|
||||||
|
"integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@floating-ui/react-dom": "^2.0.0",
|
||||||
|
"@radix-ui/react-arrow": "1.1.7",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-context": "1.1.2",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1",
|
||||||
|
"@radix-ui/react-use-rect": "1.1.1",
|
||||||
|
"@radix-ui/react-use-size": "1.1.1",
|
||||||
|
"@radix-ui/rect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-portal": {
|
||||||
|
"version": "1.1.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
||||||
|
"integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-presence": {
|
||||||
|
"version": "1.1.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
|
||||||
|
"integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -3137,12 +3255,35 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-visually-hidden": {
|
"node_modules/@radix-ui/react-visually-hidden": {
|
||||||
"version": "1.2.2",
|
"version": "1.2.3",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz",
|
||||||
"integrity": "sha512-ORCmRUbNiZIv6uV5mhFrhsIKw4UX/N3syZtyqvry61tbGm4JlgQuSn0hk5TwCARsCjkcnuRkSdCE3xfb+ADHew==",
|
"integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-primitive": "2.1.2"
|
"@radix-ui/react-primitive": "2.1.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-visually-hidden/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
|
@ -3910,12 +4051,12 @@
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/@types/node": {
|
"node_modules/@types/node": {
|
||||||
"version": "20.17.47",
|
"version": "24.3.0",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.47.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.0.tgz",
|
||||||
"integrity": "sha512-3dLX0Upo1v7RvUimvxLeXqwrfyKxUINk0EAM83swP2mlSUcwV73sZy8XhNz8bcZ3VbsfQyC/y6jRdL5tgCNpDQ==",
|
"integrity": "sha512-aPTXCrfwnDLj4VvXrm+UUCQjNEvJgNA8s5F1cvwQU+3KNltTOkBm1j30uNLyqqPNe7gE3KFzImYoZEfLhp4Yow==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~6.19.2"
|
"undici-types": "~7.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/node-fetch": {
|
"node_modules/@types/node-fetch": {
|
||||||
|
@ -6433,13 +6574,13 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/eslint-config-next": {
|
"node_modules/eslint-config-next": {
|
||||||
"version": "15.3.2",
|
"version": "15.5.2",
|
||||||
"resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-15.3.2.tgz",
|
"resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-15.5.2.tgz",
|
||||||
"integrity": "sha512-FerU4DYccO4FgeYFFglz0SnaKRe1ejXQrDb8kWUkTAg036YWi+jUsgg4sIGNCDhAsDITsZaL4MzBWKB6f4G1Dg==",
|
"integrity": "sha512-3hPZghsLupMxxZ2ggjIIrat/bPniM2yRpsVPVM40rp8ZMzKWOJp2CGWn7+EzoV2ddkUr5fxNfHpF+wU1hGt/3g==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@next/eslint-plugin-next": "15.3.2",
|
"@next/eslint-plugin-next": "15.5.2",
|
||||||
"@rushstack/eslint-patch": "^1.10.3",
|
"@rushstack/eslint-patch": "^1.10.3",
|
||||||
"@typescript-eslint/eslint-plugin": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0",
|
"@typescript-eslint/eslint-plugin": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0",
|
||||||
"@typescript-eslint/parser": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0",
|
"@typescript-eslint/parser": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0",
|
||||||
|
@ -7268,13 +7409,13 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/framer-motion": {
|
"node_modules/framer-motion": {
|
||||||
"version": "11.18.2",
|
"version": "12.23.12",
|
||||||
"resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.18.2.tgz",
|
"resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-12.23.12.tgz",
|
||||||
"integrity": "sha512-5F5Och7wrvtLVElIpclDT0CBzMVg3dL22B64aZwHtsIY8RB4mXICLrkajK4G9R+ieSAGcgrLeae2SeUTg2pr6w==",
|
"integrity": "sha512-6e78rdVtnBvlEVgu6eFEAgG9v3wLnYEboM8I5O5EXvfKC8gxGQB8wXJdhkMy10iVcn05jl6CNw7/HTsTCfwcWg==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"motion-dom": "^11.18.1",
|
"motion-dom": "^12.23.12",
|
||||||
"motion-utils": "^11.18.1",
|
"motion-utils": "^12.23.6",
|
||||||
"tslib": "^2.4.0"
|
"tslib": "^2.4.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
|
@ -10006,9 +10147,9 @@
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/llama-stack-client": {
|
"node_modules/llama-stack-client": {
|
||||||
"version": "0.2.19",
|
"version": "0.2.20",
|
||||||
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.19.tgz",
|
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.20.tgz",
|
||||||
"integrity": "sha512-sDuAhUdEGlERZ3jlMUzPXcQTgMv/pGbDrPX0ifbE5S+gr7Q+7ohuQYrIXe+hXgIipFjq+y4b2c5laZ76tmAyEA==",
|
"integrity": "sha512-1vD5nizTX5JEW8TADxKgy/P1W8YZoPSpdnmfxbdYbWgpQ3BWtbvLS6jmDk7VwVA5fRC4895VfHsRDfS1liHarw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@types/node": "^18.11.18",
|
"@types/node": "^18.11.18",
|
||||||
|
@ -11184,18 +11325,18 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/motion-dom": {
|
"node_modules/motion-dom": {
|
||||||
"version": "11.18.1",
|
"version": "12.23.12",
|
||||||
"resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-11.18.1.tgz",
|
"resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.23.12.tgz",
|
||||||
"integrity": "sha512-g76KvA001z+atjfxczdRtw/RXOM3OMSdd1f4DL77qCTF/+avrRJiawSG4yDibEQ215sr9kpinSlX2pCTJ9zbhw==",
|
"integrity": "sha512-RcR4fvMCTESQBD/uKQe49D5RUeDOokkGRmz4ceaJKDBgHYtZtntC/s2vLvY38gqGaytinij/yi3hMcWVcEF5Kw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"motion-utils": "^11.18.1"
|
"motion-utils": "^12.23.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/motion-utils": {
|
"node_modules/motion-utils": {
|
||||||
"version": "11.18.1",
|
"version": "12.23.6",
|
||||||
"resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-11.18.1.tgz",
|
"resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-12.23.6.tgz",
|
||||||
"integrity": "sha512-49Kt+HKjtbJKLtgO/LKj9Ld+6vw9BjH5d9sc40R/kVyH8GLAXgT42M2NnuPcJNuA3s9ZfZBUcwIgpmZWGEE+hA==",
|
"integrity": "sha512-eAWoPgr4eFEOFfg2WjIsMoqJTW6Z8MTUCgn/GZ3VRpClWBdnbjryiA3ZSNLyxCTmCQx4RmYX6jX1iWHbenUPNQ==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/ms": {
|
"node_modules/ms": {
|
||||||
|
@ -12083,9 +12224,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/prettier": {
|
"node_modules/prettier": {
|
||||||
"version": "3.5.3",
|
"version": "3.6.2",
|
||||||
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.6.2.tgz",
|
||||||
"integrity": "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw==",
|
"integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"bin": {
|
"bin": {
|
||||||
|
@ -13986,9 +14127,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "6.19.8",
|
"version": "7.10.0",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz",
|
||||||
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
|
"integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/unified": {
|
"node_modules/unified": {
|
||||||
|
|
|
@ -19,11 +19,11 @@
|
||||||
"@radix-ui/react-select": "^2.2.5",
|
"@radix-ui/react-select": "^2.2.5",
|
||||||
"@radix-ui/react-separator": "^1.1.7",
|
"@radix-ui/react-separator": "^1.1.7",
|
||||||
"@radix-ui/react-slot": "^1.2.3",
|
"@radix-ui/react-slot": "^1.2.3",
|
||||||
"@radix-ui/react-tooltip": "^1.2.6",
|
"@radix-ui/react-tooltip": "^1.2.8",
|
||||||
"class-variance-authority": "^0.7.1",
|
"class-variance-authority": "^0.7.1",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"framer-motion": "^11.18.2",
|
"framer-motion": "^12.23.12",
|
||||||
"llama-stack-client": "^0.2.19",
|
"llama-stack-client": "^0.2.20",
|
||||||
"lucide-react": "^0.510.0",
|
"lucide-react": "^0.510.0",
|
||||||
"next": "15.3.3",
|
"next": "15.3.3",
|
||||||
"next-auth": "^4.24.11",
|
"next-auth": "^4.24.11",
|
||||||
|
@ -44,16 +44,16 @@
|
||||||
"@testing-library/jest-dom": "^6.8.0",
|
"@testing-library/jest-dom": "^6.8.0",
|
||||||
"@testing-library/react": "^16.3.0",
|
"@testing-library/react": "^16.3.0",
|
||||||
"@types/jest": "^29.5.14",
|
"@types/jest": "^29.5.14",
|
||||||
"@types/node": "^20",
|
"@types/node": "^24",
|
||||||
"@types/react": "^19",
|
"@types/react": "^19",
|
||||||
"@types/react-dom": "^19",
|
"@types/react-dom": "^19",
|
||||||
"eslint": "^9",
|
"eslint": "^9",
|
||||||
"eslint-config-next": "15.3.2",
|
"eslint-config-next": "15.5.2",
|
||||||
"eslint-config-prettier": "^10.1.8",
|
"eslint-config-prettier": "^10.1.8",
|
||||||
"eslint-plugin-prettier": "^5.5.4",
|
"eslint-plugin-prettier": "^5.5.4",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
"jest-environment-jsdom": "^29.7.0",
|
"jest-environment-jsdom": "^29.7.0",
|
||||||
"prettier": "3.5.3",
|
"prettier": "3.6.2",
|
||||||
"tailwindcss": "^4",
|
"tailwindcss": "^4",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
"tw-animate-css": "^1.2.9",
|
"tw-animate-css": "^1.2.9",
|
||||||
|
|
|
@ -7,7 +7,7 @@ required-version = ">=0.7.0"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "llama_stack"
|
name = "llama_stack"
|
||||||
version = "0.2.19"
|
version = "0.2.20"
|
||||||
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
|
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
|
||||||
description = "Llama Stack"
|
description = "Llama Stack"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
@ -31,9 +31,8 @@ dependencies = [
|
||||||
"huggingface-hub>=0.34.0,<1.0",
|
"huggingface-hub>=0.34.0,<1.0",
|
||||||
"jinja2>=3.1.6",
|
"jinja2>=3.1.6",
|
||||||
"jsonschema",
|
"jsonschema",
|
||||||
"llama-stack-client>=0.2.19",
|
"llama-stack-client>=0.2.20",
|
||||||
"llama-api-client>=0.1.2",
|
"openai>=1.99.6",
|
||||||
"openai>=1.99.6,<1.100.0",
|
|
||||||
"prompt-toolkit",
|
"prompt-toolkit",
|
||||||
"python-dotenv",
|
"python-dotenv",
|
||||||
"python-jose[cryptography]",
|
"python-jose[cryptography]",
|
||||||
|
@ -56,7 +55,7 @@ dependencies = [
|
||||||
ui = [
|
ui = [
|
||||||
"streamlit",
|
"streamlit",
|
||||||
"pandas",
|
"pandas",
|
||||||
"llama-stack-client>=0.2.19",
|
"llama-stack-client>=0.2.20",
|
||||||
"streamlit-option-menu",
|
"streamlit-option-menu",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -84,6 +83,7 @@ unit = [
|
||||||
"openai",
|
"openai",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
|
"psycopg2-binary>=2.9.0",
|
||||||
"pypdf",
|
"pypdf",
|
||||||
"mcp",
|
"mcp",
|
||||||
"chardet",
|
"chardet",
|
||||||
|
@ -92,7 +92,7 @@ unit = [
|
||||||
"sqlalchemy[asyncio]>=2.0.41",
|
"sqlalchemy[asyncio]>=2.0.41",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
"faiss-cpu",
|
"faiss-cpu",
|
||||||
"pymilvus>=2.5.12",
|
"pymilvus>=2.6.1",
|
||||||
"milvus-lite>=2.5.0",
|
"milvus-lite>=2.5.0",
|
||||||
"litellm",
|
"litellm",
|
||||||
"together",
|
"together",
|
||||||
|
@ -105,12 +105,13 @@ unit = [
|
||||||
# separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra
|
# separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra
|
||||||
# dependencies.
|
# dependencies.
|
||||||
test = [
|
test = [
|
||||||
"openai",
|
"openai>=1.100.0", # for expires_after support
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"torch>=2.6.0",
|
"torch>=2.6.0",
|
||||||
"torchvision>=0.21.0",
|
"torchvision>=0.21.0",
|
||||||
"chardet",
|
"chardet",
|
||||||
|
"psycopg2-binary>=2.9.0",
|
||||||
"pypdf",
|
"pypdf",
|
||||||
"mcp",
|
"mcp",
|
||||||
"datasets",
|
"datasets",
|
||||||
|
@ -119,7 +120,7 @@ test = [
|
||||||
"sqlalchemy",
|
"sqlalchemy",
|
||||||
"sqlalchemy[asyncio]>=2.0.41",
|
"sqlalchemy[asyncio]>=2.0.41",
|
||||||
"requests",
|
"requests",
|
||||||
"pymilvus>=2.5.12",
|
"pymilvus>=2.6.1",
|
||||||
"milvus-lite>=2.5.0",
|
"milvus-lite>=2.5.0",
|
||||||
"weaviate-client>=4.16.4",
|
"weaviate-client>=4.16.4",
|
||||||
]
|
]
|
||||||
|
@ -144,7 +145,7 @@ docs = [
|
||||||
]
|
]
|
||||||
codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
|
codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
|
||||||
benchmark = [
|
benchmark = [
|
||||||
"locust>=2.37.14",
|
"locust>=2.39.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
|
|
|
@ -15,7 +15,7 @@ set -euo pipefail
|
||||||
BRANCH=""
|
BRANCH=""
|
||||||
TEST_SUBDIRS=""
|
TEST_SUBDIRS=""
|
||||||
TEST_PROVIDER="ollama"
|
TEST_PROVIDER="ollama"
|
||||||
RUN_VISION_TESTS=false
|
TEST_SUITE="base"
|
||||||
TEST_PATTERN=""
|
TEST_PATTERN=""
|
||||||
|
|
||||||
# Help function
|
# Help function
|
||||||
|
@ -27,9 +27,9 @@ Trigger the integration test recording workflow remotely. This way you do not ne
|
||||||
|
|
||||||
OPTIONS:
|
OPTIONS:
|
||||||
-b, --branch BRANCH Branch to run the workflow on (defaults to current branch)
|
-b, --branch BRANCH Branch to run the workflow on (defaults to current branch)
|
||||||
-s, --test-subdirs DIRS Comma-separated list of test subdirectories to run (REQUIRED)
|
|
||||||
-p, --test-provider PROVIDER Test provider to use: vllm or ollama (default: ollama)
|
-p, --test-provider PROVIDER Test provider to use: vllm or ollama (default: ollama)
|
||||||
-v, --run-vision-tests Include vision tests in the recording
|
-t, --test-suite SUITE Test suite to use: base, responses, vision, etc. (default: base)
|
||||||
|
-s, --test-subdirs DIRS Comma-separated list of test subdirectories to run (overrides suite)
|
||||||
-k, --test-pattern PATTERN Regex pattern to pass to pytest -k
|
-k, --test-pattern PATTERN Regex pattern to pass to pytest -k
|
||||||
-h, --help Show this help message
|
-h, --help Show this help message
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ EXAMPLES:
|
||||||
$0 --test-subdirs "agents"
|
$0 --test-subdirs "agents"
|
||||||
|
|
||||||
# Record tests for specific branch with vision tests
|
# Record tests for specific branch with vision tests
|
||||||
$0 -b my-feature-branch --test-subdirs "inference" --run-vision-tests
|
$0 -b my-feature-branch --test-suite vision
|
||||||
|
|
||||||
# Record multiple test subdirectories with specific provider
|
# Record multiple test subdirectories with specific provider
|
||||||
$0 --test-subdirs "agents,inference" --test-provider vllm
|
$0 --test-subdirs "agents,inference" --test-provider vllm
|
||||||
|
@ -71,9 +71,9 @@ while [[ $# -gt 0 ]]; do
|
||||||
TEST_PROVIDER="$2"
|
TEST_PROVIDER="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
-v|--run-vision-tests)
|
-t|--test-suite)
|
||||||
RUN_VISION_TESTS=true
|
TEST_SUITE="$2"
|
||||||
shift
|
shift 2
|
||||||
;;
|
;;
|
||||||
-k|--test-pattern)
|
-k|--test-pattern)
|
||||||
TEST_PATTERN="$2"
|
TEST_PATTERN="$2"
|
||||||
|
@ -92,11 +92,11 @@ while [[ $# -gt 0 ]]; do
|
||||||
done
|
done
|
||||||
|
|
||||||
# Validate required parameters
|
# Validate required parameters
|
||||||
if [[ -z "$TEST_SUBDIRS" ]]; then
|
if [[ -z "$TEST_SUBDIRS" && -z "$TEST_SUITE" ]]; then
|
||||||
echo "Error: --test-subdirs is required"
|
echo "Error: --test-subdirs or --test-suite is required"
|
||||||
echo "Please specify which test subdirectories to run, e.g.:"
|
echo "Please specify which test subdirectories to run or test suite to use, e.g.:"
|
||||||
echo " $0 --test-subdirs \"agents,inference\""
|
echo " $0 --test-subdirs \"agents,inference\""
|
||||||
echo " $0 --test-subdirs \"inference\" --run-vision-tests"
|
echo " $0 --test-suite vision"
|
||||||
echo ""
|
echo ""
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
@ -239,17 +239,19 @@ echo "Triggering integration test recording workflow..."
|
||||||
echo "Branch: $BRANCH"
|
echo "Branch: $BRANCH"
|
||||||
echo "Test provider: $TEST_PROVIDER"
|
echo "Test provider: $TEST_PROVIDER"
|
||||||
echo "Test subdirs: $TEST_SUBDIRS"
|
echo "Test subdirs: $TEST_SUBDIRS"
|
||||||
echo "Run vision tests: $RUN_VISION_TESTS"
|
echo "Test suite: $TEST_SUITE"
|
||||||
echo "Test pattern: ${TEST_PATTERN:-"(none)"}"
|
echo "Test pattern: ${TEST_PATTERN:-"(none)"}"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Prepare inputs for gh workflow run
|
# Prepare inputs for gh workflow run
|
||||||
INPUTS="-f test-subdirs='$TEST_SUBDIRS'"
|
if [[ -n "$TEST_SUBDIRS" ]]; then
|
||||||
|
INPUTS="-f test-subdirs='$TEST_SUBDIRS'"
|
||||||
|
fi
|
||||||
if [[ -n "$TEST_PROVIDER" ]]; then
|
if [[ -n "$TEST_PROVIDER" ]]; then
|
||||||
INPUTS="$INPUTS -f test-provider='$TEST_PROVIDER'"
|
INPUTS="$INPUTS -f test-provider='$TEST_PROVIDER'"
|
||||||
fi
|
fi
|
||||||
if [[ "$RUN_VISION_TESTS" == "true" ]]; then
|
if [[ -n "$TEST_SUITE" ]]; then
|
||||||
INPUTS="$INPUTS -f run-vision-tests=true"
|
INPUTS="$INPUTS -f test-suite='$TEST_SUITE'"
|
||||||
fi
|
fi
|
||||||
if [[ -n "$TEST_PATTERN" ]]; then
|
if [[ -n "$TEST_PATTERN" ]]; then
|
||||||
INPUTS="$INPUTS -f test-pattern='$TEST_PATTERN'"
|
INPUTS="$INPUTS -f test-pattern='$TEST_PATTERN'"
|
||||||
|
|
|
@ -16,7 +16,7 @@ STACK_CONFIG=""
|
||||||
PROVIDER=""
|
PROVIDER=""
|
||||||
TEST_SUBDIRS=""
|
TEST_SUBDIRS=""
|
||||||
TEST_PATTERN=""
|
TEST_PATTERN=""
|
||||||
RUN_VISION_TESTS="false"
|
TEST_SUITE="base"
|
||||||
INFERENCE_MODE="replay"
|
INFERENCE_MODE="replay"
|
||||||
EXTRA_PARAMS=""
|
EXTRA_PARAMS=""
|
||||||
|
|
||||||
|
@ -28,12 +28,16 @@ Usage: $0 [OPTIONS]
|
||||||
Options:
|
Options:
|
||||||
--stack-config STRING Stack configuration to use (required)
|
--stack-config STRING Stack configuration to use (required)
|
||||||
--provider STRING Provider to use (ollama, vllm, etc.) (required)
|
--provider STRING Provider to use (ollama, vllm, etc.) (required)
|
||||||
--test-subdirs STRING Comma-separated list of test subdirectories to run (default: 'inference')
|
--test-suite STRING Comma-separated list of test suites to run (default: 'base')
|
||||||
--run-vision-tests Run vision tests instead of regular tests
|
|
||||||
--inference-mode STRING Inference mode: record or replay (default: replay)
|
--inference-mode STRING Inference mode: record or replay (default: replay)
|
||||||
|
--test-subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
|
||||||
--test-pattern STRING Regex pattern to pass to pytest -k
|
--test-pattern STRING Regex pattern to pass to pytest -k
|
||||||
--help Show this help message
|
--help Show this help message
|
||||||
|
|
||||||
|
Suites are defined in tests/integration/suites.py. They are used to narrow the collection of tests and provide default model options.
|
||||||
|
|
||||||
|
You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
# Basic inference tests with ollama
|
# Basic inference tests with ollama
|
||||||
$0 --stack-config server:ci-tests --provider ollama
|
$0 --stack-config server:ci-tests --provider ollama
|
||||||
|
@ -42,7 +46,7 @@ Examples:
|
||||||
$0 --stack-config server:ci-tests --provider vllm --test-subdirs 'inference,agents'
|
$0 --stack-config server:ci-tests --provider vllm --test-subdirs 'inference,agents'
|
||||||
|
|
||||||
# Vision tests with ollama
|
# Vision tests with ollama
|
||||||
$0 --stack-config server:ci-tests --provider ollama --run-vision-tests
|
$0 --stack-config server:ci-tests --provider ollama --test-suite vision
|
||||||
|
|
||||||
# Record mode for updating test recordings
|
# Record mode for updating test recordings
|
||||||
$0 --stack-config server:ci-tests --provider ollama --inference-mode record
|
$0 --stack-config server:ci-tests --provider ollama --inference-mode record
|
||||||
|
@ -64,9 +68,9 @@ while [[ $# -gt 0 ]]; do
|
||||||
TEST_SUBDIRS="$2"
|
TEST_SUBDIRS="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
--run-vision-tests)
|
--test-suite)
|
||||||
RUN_VISION_TESTS="true"
|
TEST_SUITE="$2"
|
||||||
shift
|
shift 2
|
||||||
;;
|
;;
|
||||||
--inference-mode)
|
--inference-mode)
|
||||||
INFERENCE_MODE="$2"
|
INFERENCE_MODE="$2"
|
||||||
|
@ -92,22 +96,25 @@ done
|
||||||
# Validate required parameters
|
# Validate required parameters
|
||||||
if [[ -z "$STACK_CONFIG" ]]; then
|
if [[ -z "$STACK_CONFIG" ]]; then
|
||||||
echo "Error: --stack-config is required"
|
echo "Error: --stack-config is required"
|
||||||
usage
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ -z "$PROVIDER" ]]; then
|
if [[ -z "$PROVIDER" ]]; then
|
||||||
echo "Error: --provider is required"
|
echo "Error: --provider is required"
|
||||||
usage
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "$TEST_SUITE" && -z "$TEST_SUBDIRS" ]]; then
|
||||||
|
echo "Error: --test-suite or --test-subdirs is required"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "=== Llama Stack Integration Test Runner ==="
|
echo "=== Llama Stack Integration Test Runner ==="
|
||||||
echo "Stack Config: $STACK_CONFIG"
|
echo "Stack Config: $STACK_CONFIG"
|
||||||
echo "Provider: $PROVIDER"
|
echo "Provider: $PROVIDER"
|
||||||
echo "Test Subdirs: $TEST_SUBDIRS"
|
|
||||||
echo "Vision Tests: $RUN_VISION_TESTS"
|
|
||||||
echo "Inference Mode: $INFERENCE_MODE"
|
echo "Inference Mode: $INFERENCE_MODE"
|
||||||
|
echo "Test Suite: $TEST_SUITE"
|
||||||
|
echo "Test Subdirs: $TEST_SUBDIRS"
|
||||||
echo "Test Pattern: $TEST_PATTERN"
|
echo "Test Pattern: $TEST_PATTERN"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
|
@ -140,13 +147,6 @@ THIS_DIR=$(dirname "$0")
|
||||||
ROOT_DIR="$THIS_DIR/.."
|
ROOT_DIR="$THIS_DIR/.."
|
||||||
cd $ROOT_DIR
|
cd $ROOT_DIR
|
||||||
|
|
||||||
# Set recording directory
|
|
||||||
if [[ "$RUN_VISION_TESTS" == "true" ]]; then
|
|
||||||
export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings/vision"
|
|
||||||
else
|
|
||||||
export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# check if "llama" and "pytest" are available. this script does not use `uv run` given
|
# check if "llama" and "pytest" are available. this script does not use `uv run` given
|
||||||
# it can be used in a pre-release environment where we have not been able to tell
|
# it can be used in a pre-release environment where we have not been able to tell
|
||||||
# uv about pre-release dependencies properly (yet).
|
# uv about pre-release dependencies properly (yet).
|
||||||
|
@ -201,84 +201,46 @@ if [[ -n "$TEST_PATTERN" ]]; then
|
||||||
PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
|
PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Run vision tests if specified
|
|
||||||
if [[ "$RUN_VISION_TESTS" == "true" ]]; then
|
|
||||||
echo "Running vision tests..."
|
|
||||||
set +e
|
|
||||||
pytest -s -v tests/integration/inference/test_vision_inference.py \
|
|
||||||
--stack-config="$STACK_CONFIG" \
|
|
||||||
-k "$PYTEST_PATTERN" \
|
|
||||||
--vision-model=ollama/llama3.2-vision:11b \
|
|
||||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
|
||||||
--color=yes $EXTRA_PARAMS \
|
|
||||||
--capture=tee-sys
|
|
||||||
exit_code=$?
|
|
||||||
set -e
|
|
||||||
|
|
||||||
if [ $exit_code -eq 0 ]; then
|
|
||||||
echo "✅ Vision tests completed successfully"
|
|
||||||
elif [ $exit_code -eq 5 ]; then
|
|
||||||
echo "⚠️ No vision tests collected (pattern matched no tests)"
|
|
||||||
else
|
|
||||||
echo "❌ Vision tests failed"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Run regular tests
|
|
||||||
if [[ -z "$TEST_SUBDIRS" ]]; then
|
|
||||||
TEST_SUBDIRS=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
|
|
||||||
sed 's|tests/integration/||' |
|
|
||||||
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
|
|
||||||
sort)
|
|
||||||
fi
|
|
||||||
echo "Test subdirs to run: $TEST_SUBDIRS"
|
echo "Test subdirs to run: $TEST_SUBDIRS"
|
||||||
|
|
||||||
# Collect all test files for the specified test types
|
if [[ -n "$TEST_SUBDIRS" ]]; then
|
||||||
TEST_FILES=""
|
# Collect all test files for the specified test types
|
||||||
for test_subdir in $(echo "$TEST_SUBDIRS" | tr ',' '\n'); do
|
TEST_FILES=""
|
||||||
# Skip certain test types for vllm provider
|
for test_subdir in $(echo "$TEST_SUBDIRS" | tr ',' '\n'); do
|
||||||
if [[ "$PROVIDER" == "vllm" ]]; then
|
if [[ -d "tests/integration/$test_subdir" ]]; then
|
||||||
if [[ "$test_subdir" == "safety" ]] || [[ "$test_subdir" == "post_training" ]] || [[ "$test_subdir" == "tool_runtime" ]]; then
|
# Find all Python test files in this directory
|
||||||
echo "Skipping $test_subdir for vllm provider"
|
test_files=$(find tests/integration/$test_subdir -name "test_*.py" -o -name "*_test.py")
|
||||||
continue
|
if [[ -n "$test_files" ]]; then
|
||||||
|
TEST_FILES="$TEST_FILES $test_files"
|
||||||
|
echo "Added test files from $test_subdir: $(echo $test_files | wc -w) files"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Warning: Directory tests/integration/$test_subdir does not exist"
|
||||||
fi
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -z "$TEST_FILES" ]]; then
|
||||||
|
echo "No test files found for the specified test types"
|
||||||
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$STACK_CONFIG" != *"server:"* ]] && [[ "$test_subdir" == "batches" ]]; then
|
echo ""
|
||||||
echo "Skipping $test_subdir for library client until types are supported"
|
echo "=== Running all collected tests in a single pytest command ==="
|
||||||
continue
|
echo "Total test files: $(echo $TEST_FILES | wc -w)"
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -d "tests/integration/$test_subdir" ]]; then
|
PYTEST_TARGET="$TEST_FILES"
|
||||||
# Find all Python test files in this directory
|
EXTRA_PARAMS="$EXTRA_PARAMS --text-model=$TEXT_MODEL --embedding-model=sentence-transformers/all-MiniLM-L6-v2"
|
||||||
test_files=$(find tests/integration/$test_subdir -name "test_*.py" -o -name "*_test.py")
|
else
|
||||||
if [[ -n "$test_files" ]]; then
|
PYTEST_TARGET="tests/integration/"
|
||||||
TEST_FILES="$TEST_FILES $test_files"
|
EXTRA_PARAMS="$EXTRA_PARAMS --suite=$TEST_SUITE"
|
||||||
echo "Added test files from $test_subdir: $(echo $test_files | wc -w) files"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "Warning: Directory tests/integration/$test_subdir does not exist"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ -z "$TEST_FILES" ]]; then
|
|
||||||
echo "No test files found for the specified test types"
|
|
||||||
exit 1
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=== Running all collected tests in a single pytest command ==="
|
|
||||||
echo "Total test files: $(echo $TEST_FILES | wc -w)"
|
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
pytest -s -v $TEST_FILES \
|
pytest -s -v $PYTEST_TARGET \
|
||||||
--stack-config="$STACK_CONFIG" \
|
--stack-config="$STACK_CONFIG" \
|
||||||
-k "$PYTEST_PATTERN" \
|
-k "$PYTEST_PATTERN" \
|
||||||
--text-model="$TEXT_MODEL" \
|
$EXTRA_PARAMS \
|
||||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
--color=yes \
|
||||||
--color=yes $EXTRA_PARAMS \
|
|
||||||
--capture=tee-sys
|
--capture=tee-sys
|
||||||
exit_code=$?
|
exit_code=$?
|
||||||
set -e
|
set -e
|
||||||
|
@ -298,5 +260,18 @@ echo "=== System Resources After Tests ==="
|
||||||
free -h 2>/dev/null || echo "free command not available"
|
free -h 2>/dev/null || echo "free command not available"
|
||||||
df -h
|
df -h
|
||||||
|
|
||||||
|
# stop server
|
||||||
|
if [[ "$STACK_CONFIG" == *"server:"* ]]; then
|
||||||
|
echo "Stopping Llama Stack Server..."
|
||||||
|
pids=$(lsof -i :8321 | awk 'NR>1 {print $2}')
|
||||||
|
if [[ -n "$pids" ]]; then
|
||||||
|
echo "Killing Llama Stack Server processes: $pids"
|
||||||
|
kill -9 $pids
|
||||||
|
else
|
||||||
|
echo "No Llama Stack Server processes found ?!"
|
||||||
|
fi
|
||||||
|
echo "Llama Stack Server stopped"
|
||||||
|
fi
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "=== Integration Tests Complete ==="
|
echo "=== Integration Tests Complete ==="
|
||||||
|
|
|
@ -38,26 +38,15 @@ For running integration tests, you must provide a few things:
|
||||||
- a distribution name (e.g., `starter`) or a path to a `run.yaml` file
|
- a distribution name (e.g., `starter`) or a path to a `run.yaml` file
|
||||||
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
||||||
|
|
||||||
- Whether you are using replay or live mode for inference. This is specified with the LLAMA_STACK_TEST_INFERENCE_MODE environment variable. The default mode currently is "live" -- that is certainly surprising, but we will fix this soon.
|
|
||||||
|
|
||||||
- Any API keys you need to use should be set in the environment, or can be passed in with the --env option.
|
- Any API keys you need to use should be set in the environment, or can be passed in with the --env option.
|
||||||
|
|
||||||
You can run the integration tests in replay mode with:
|
You can run the integration tests in replay mode with:
|
||||||
```bash
|
```bash
|
||||||
# Run all tests with existing recordings
|
# Run all tests with existing recordings
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=replay \
|
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
uv run --group test \
|
uv run --group test \
|
||||||
pytest -sv tests/integration/ --stack-config=starter
|
pytest -sv tests/integration/ --stack-config=starter
|
||||||
```
|
```
|
||||||
|
|
||||||
If you don't specify LLAMA_STACK_TEST_INFERENCE_MODE, by default it will be in "live" mode -- that is, it will make real API calls.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Test against live APIs
|
|
||||||
FIREWORKS_API_KEY=your_key pytest -sv tests/integration/inference --stack-config=starter
|
|
||||||
```
|
|
||||||
|
|
||||||
### Re-recording tests
|
### Re-recording tests
|
||||||
|
|
||||||
#### Local Re-recording (Manual Setup Required)
|
#### Local Re-recording (Manual Setup Required)
|
||||||
|
@ -66,7 +55,6 @@ If you want to re-record tests locally, you can do so with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=record \
|
LLAMA_STACK_TEST_INFERENCE_MODE=record \
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
uv run --group test \
|
uv run --group test \
|
||||||
pytest -sv tests/integration/ --stack-config=starter -k "<appropriate test name>"
|
pytest -sv tests/integration/ --stack-config=starter -k "<appropriate test name>"
|
||||||
```
|
```
|
||||||
|
@ -89,7 +77,7 @@ You must be careful when re-recording. CI workflows assume a specific setup for
|
||||||
./scripts/github/schedule-record-workflow.sh --test-subdirs "agents,inference"
|
./scripts/github/schedule-record-workflow.sh --test-subdirs "agents,inference"
|
||||||
|
|
||||||
# Record with vision tests enabled
|
# Record with vision tests enabled
|
||||||
./scripts/github/schedule-record-workflow.sh --test-subdirs "inference" --run-vision-tests
|
./scripts/github/schedule-record-workflow.sh --test-suite vision
|
||||||
|
|
||||||
# Record with specific provider
|
# Record with specific provider
|
||||||
./scripts/github/schedule-record-workflow.sh --test-subdirs "agents" --test-provider vllm
|
./scripts/github/schedule-record-workflow.sh --test-subdirs "agents" --test-provider vllm
|
||||||
|
|
|
@ -42,6 +42,27 @@ Model parameters can be influenced by the following options:
|
||||||
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
|
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
|
||||||
if no model is specified.
|
if no model is specified.
|
||||||
|
|
||||||
|
### Suites (fast selection + sane defaults)
|
||||||
|
|
||||||
|
- `--suite`: comma-separated list of named suites that both narrow which tests are collected and prefill common model options (unless you pass them explicitly).
|
||||||
|
- Available suites:
|
||||||
|
- `responses`: collects tests under `tests/integration/responses`; this is a separate suite because it needs a strong tool-calling model.
|
||||||
|
- `vision`: collects only `tests/integration/inference/test_vision_inference.py`; defaults `--vision-model=ollama/llama3.2-vision:11b`, `--embedding-model=sentence-transformers/all-MiniLM-L6-v2`.
|
||||||
|
- Explicit flags always win. For example, `--suite=responses --text-model=<X>` overrides the suite’s text model.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Fast responses run with defaults
|
||||||
|
pytest -s -v tests/integration --stack-config=server:starter --suite=responses
|
||||||
|
|
||||||
|
# Fast single-file vision run with defaults
|
||||||
|
pytest -s -v tests/integration --stack-config=server:starter --suite=vision
|
||||||
|
|
||||||
|
# Combine suites and override a default
|
||||||
|
pytest -s -v tests/integration --stack-config=server:starter --suite=responses,vision --embedding-model=text-embedding-3-small
|
||||||
|
```
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
### Testing against a Server
|
### Testing against a Server
|
||||||
|
@ -98,29 +119,25 @@ sentence-transformers/all-MiniLM-L6-v2
|
||||||
|
|
||||||
The testing system supports three modes controlled by environment variables:
|
The testing system supports three modes controlled by environment variables:
|
||||||
|
|
||||||
### LIVE Mode (Default)
|
### REPLAY Mode (Default)
|
||||||
Tests make real API calls:
|
Uses cached responses instead of making API calls:
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=live pytest tests/integration/
|
pytest tests/integration/
|
||||||
```
|
```
|
||||||
|
|
||||||
### RECORD Mode
|
### RECORD Mode
|
||||||
Captures API interactions for later replay:
|
Captures API interactions for later replay:
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=record \
|
LLAMA_STACK_TEST_INFERENCE_MODE=record \
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
pytest tests/integration/inference/test_new_feature.py
|
pytest tests/integration/inference/test_new_feature.py
|
||||||
```
|
```
|
||||||
|
|
||||||
### REPLAY Mode
|
### LIVE Mode
|
||||||
Uses cached responses instead of making API calls:
|
Tests make real API calls (but not recorded):
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=replay \
|
LLAMA_STACK_TEST_INFERENCE_MODE=live pytest tests/integration/
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
pytest tests/integration/
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that right now you must specify the recording directory. This is because different tests use different recording directories and we don't (yet) have a fool-proof way to map a test to a recording directory. We are working on this.
|
By default, the recording directory is `tests/integration/recordings`. You can override this by setting the `LLAMA_STACK_TEST_RECORDING_DIR` environment variable.
|
||||||
|
|
||||||
## Managing Recordings
|
## Managing Recordings
|
||||||
|
|
||||||
|
@ -146,7 +163,6 @@ See the [main testing guide](../README.md#remote-re-recording-recommended) for f
|
||||||
```bash
|
```bash
|
||||||
# Re-record specific tests
|
# Re-record specific tests
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=record \
|
LLAMA_STACK_TEST_INFERENCE_MODE=record \
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
pytest -s -v --stack-config=server:starter tests/integration/inference/test_modified.py
|
pytest -s -v --stack-config=server:starter tests/integration/inference/test_modified.py
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -268,3 +268,58 @@ class TestBatchesIntegration:
|
||||||
|
|
||||||
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
|
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
|
||||||
assert deleted_error_file.deleted, f"Error file {final_batch.error_file_id} was not deleted successfully"
|
assert deleted_error_file.deleted, f"Error file {final_batch.error_file_id} was not deleted successfully"
|
||||||
|
|
||||||
|
def test_batch_e2e_completions(self, openai_client, batch_helper, text_model_id):
|
||||||
|
"""Run an end-to-end batch with a single successful text completion request."""
|
||||||
|
request_body = {"model": text_model_id, "prompt": "Say completions", "max_tokens": 20}
|
||||||
|
|
||||||
|
batch_requests = [
|
||||||
|
{
|
||||||
|
"custom_id": "success-1",
|
||||||
|
"method": "POST",
|
||||||
|
"url": "/v1/completions",
|
||||||
|
"body": request_body,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with batch_helper.create_file(batch_requests) as uploaded_file:
|
||||||
|
batch = openai_client.batches.create(
|
||||||
|
input_file_id=uploaded_file.id,
|
||||||
|
endpoint="/v1/completions",
|
||||||
|
completion_window="24h",
|
||||||
|
metadata={"test": "e2e_completions_success"},
|
||||||
|
)
|
||||||
|
|
||||||
|
final_batch = batch_helper.wait_for(
|
||||||
|
batch.id,
|
||||||
|
max_wait_time=3 * 60,
|
||||||
|
expected_statuses={"completed"},
|
||||||
|
timeout_action="skip",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert final_batch.status == "completed"
|
||||||
|
assert final_batch.request_counts is not None
|
||||||
|
assert final_batch.request_counts.total == 1
|
||||||
|
assert final_batch.request_counts.completed == 1
|
||||||
|
assert final_batch.output_file_id is not None
|
||||||
|
|
||||||
|
output_content = openai_client.files.content(final_batch.output_file_id)
|
||||||
|
if isinstance(output_content, str):
|
||||||
|
output_text = output_content
|
||||||
|
else:
|
||||||
|
output_text = output_content.content.decode("utf-8")
|
||||||
|
|
||||||
|
output_lines = output_text.strip().split("\n")
|
||||||
|
assert len(output_lines) == 1
|
||||||
|
|
||||||
|
result = json.loads(output_lines[0])
|
||||||
|
assert result["custom_id"] == "success-1"
|
||||||
|
assert "response" in result
|
||||||
|
assert result["response"]["status_code"] == 200
|
||||||
|
|
||||||
|
deleted_output_file = openai_client.files.delete(final_batch.output_file_id)
|
||||||
|
assert deleted_output_file.deleted
|
||||||
|
|
||||||
|
if final_batch.error_file_id is not None:
|
||||||
|
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
|
||||||
|
assert deleted_error_file.deleted
|
||||||
|
|
|
@ -6,15 +6,17 @@
|
||||||
import inspect
|
import inspect
|
||||||
import itertools
|
import itertools
|
||||||
import os
|
import os
|
||||||
import platform
|
|
||||||
import textwrap
|
import textwrap
|
||||||
import time
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
|
from .suites import SUITE_DEFINITIONS
|
||||||
|
|
||||||
logger = get_logger(__name__, category="tests")
|
logger = get_logger(__name__, category="tests")
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,6 +32,8 @@ def pytest_runtest_makereport(item, call):
|
||||||
def pytest_sessionstart(session):
|
def pytest_sessionstart(session):
|
||||||
# stop macOS from complaining about duplicate OpenMP libraries
|
# stop macOS from complaining about duplicate OpenMP libraries
|
||||||
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
||||||
|
if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
|
||||||
|
os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"
|
||||||
|
|
||||||
|
|
||||||
def pytest_runtest_teardown(item):
|
def pytest_runtest_teardown(item):
|
||||||
|
@ -59,9 +63,22 @@ def pytest_configure(config):
|
||||||
key, value = env_var.split("=", 1)
|
key, value = env_var.split("=", 1)
|
||||||
os.environ[key] = value
|
os.environ[key] = value
|
||||||
|
|
||||||
if platform.system() == "Darwin": # Darwin is the system name for macOS
|
suites_raw = config.getoption("--suite")
|
||||||
os.environ["DISABLE_CODE_SANDBOX"] = "1"
|
suites: list[str] = []
|
||||||
logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
|
if suites_raw:
|
||||||
|
suites = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
|
||||||
|
unknown = [p for p in suites if p not in SUITE_DEFINITIONS]
|
||||||
|
if unknown:
|
||||||
|
raise pytest.UsageError(
|
||||||
|
f"Unknown suite(s): {', '.join(unknown)}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}"
|
||||||
|
)
|
||||||
|
for suite in suites:
|
||||||
|
suite_def = SUITE_DEFINITIONS.get(suite, {})
|
||||||
|
defaults: dict = suite_def.get("defaults", {})
|
||||||
|
for dest, value in defaults.items():
|
||||||
|
current = getattr(config.option, dest, None)
|
||||||
|
if not current:
|
||||||
|
setattr(config.option, dest, value)
|
||||||
|
|
||||||
|
|
||||||
def pytest_addoption(parser):
|
def pytest_addoption(parser):
|
||||||
|
@ -103,16 +120,21 @@ def pytest_addoption(parser):
|
||||||
default=384,
|
default=384,
|
||||||
help="Output dimensionality of the embedding model to use for testing. Default: 384",
|
help="Output dimensionality of the embedding model to use for testing. Default: 384",
|
||||||
)
|
)
|
||||||
parser.addoption(
|
|
||||||
"--record-responses",
|
|
||||||
action="store_true",
|
|
||||||
help="Record new API responses instead of using cached ones.",
|
|
||||||
)
|
|
||||||
parser.addoption(
|
parser.addoption(
|
||||||
"--report",
|
"--report",
|
||||||
help="Path where the test report should be written, e.g. --report=/path/to/report.md",
|
help="Path where the test report should be written, e.g. --report=/path/to/report.md",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
available_suites = ", ".join(sorted(SUITE_DEFINITIONS.keys()))
|
||||||
|
suite_help = (
|
||||||
|
"Comma-separated integration test suites to narrow collection and prefill defaults. "
|
||||||
|
"Available: "
|
||||||
|
f"{available_suites}. "
|
||||||
|
"Explicit CLI flags (e.g., --text-model) override suite defaults. "
|
||||||
|
"Examples: --suite=responses or --suite=responses,vision."
|
||||||
|
)
|
||||||
|
parser.addoption("--suite", help=suite_help)
|
||||||
|
|
||||||
|
|
||||||
MODEL_SHORT_IDS = {
|
MODEL_SHORT_IDS = {
|
||||||
"meta-llama/Llama-3.2-3B-Instruct": "3B",
|
"meta-llama/Llama-3.2-3B-Instruct": "3B",
|
||||||
|
@ -195,3 +217,40 @@ def pytest_generate_tests(metafunc):
|
||||||
|
|
||||||
|
|
||||||
pytest_plugins = ["tests.integration.fixtures.common"]
|
pytest_plugins = ["tests.integration.fixtures.common"]
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
|
||||||
|
"""Skip collecting paths outside the selected suite roots for speed."""
|
||||||
|
suites_raw = config.getoption("--suite")
|
||||||
|
if not suites_raw:
|
||||||
|
return False
|
||||||
|
|
||||||
|
names = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
|
||||||
|
roots: list[str] = []
|
||||||
|
for name in names:
|
||||||
|
suite_def = SUITE_DEFINITIONS.get(name)
|
||||||
|
if suite_def:
|
||||||
|
roots.extend(suite_def.get("roots", []))
|
||||||
|
if not roots:
|
||||||
|
return False
|
||||||
|
|
||||||
|
p = Path(str(path)).resolve()
|
||||||
|
|
||||||
|
# Only constrain within tests/integration to avoid ignoring unrelated tests
|
||||||
|
integration_root = (Path(str(config.rootpath)) / "tests" / "integration").resolve()
|
||||||
|
if not p.is_relative_to(integration_root):
|
||||||
|
return False
|
||||||
|
|
||||||
|
for r in roots:
|
||||||
|
rp = (Path(str(config.rootpath)) / r).resolve()
|
||||||
|
if rp.is_file():
|
||||||
|
# Allow the exact file and any ancestor directories so pytest can walk into it.
|
||||||
|
if p == rp:
|
||||||
|
return False
|
||||||
|
if p.is_dir() and rp.is_relative_to(p):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Allow anything inside an allowed directory
|
||||||
|
if p.is_relative_to(rp):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
|
@ -8,6 +8,7 @@ from io import BytesIO
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
from llama_stack.core.datatypes import User
|
from llama_stack.core.datatypes import User
|
||||||
|
|
||||||
|
@ -79,6 +80,88 @@ def test_openai_client_basic_operations(openai_client):
|
||||||
pass # ignore 404
|
pass # ignore 404
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(message="expires_after not available on all providers")
|
||||||
|
def test_expires_after(openai_client):
|
||||||
|
"""Test uploading a file with expires_after parameter."""
|
||||||
|
client = openai_client
|
||||||
|
|
||||||
|
uploaded_file = None
|
||||||
|
try:
|
||||||
|
with BytesIO(b"expires_after test") as file_buffer:
|
||||||
|
file_buffer.name = "expires_after.txt"
|
||||||
|
uploaded_file = client.files.create(
|
||||||
|
file=file_buffer,
|
||||||
|
purpose="assistants",
|
||||||
|
expires_after={"anchor": "created_at", "seconds": 4545},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert uploaded_file.expires_at is not None
|
||||||
|
assert uploaded_file.expires_at == uploaded_file.created_at + 4545
|
||||||
|
|
||||||
|
listed = client.files.list()
|
||||||
|
ids = [f.id for f in listed.data]
|
||||||
|
assert uploaded_file.id in ids
|
||||||
|
|
||||||
|
retrieved = client.files.retrieve(uploaded_file.id)
|
||||||
|
assert retrieved.id == uploaded_file.id
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if uploaded_file is not None:
|
||||||
|
try:
|
||||||
|
client.files.delete(uploaded_file.id)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(message="expires_after not available on all providers")
|
||||||
|
def test_expires_after_requests(openai_client):
|
||||||
|
"""Upload a file using requests multipart/form-data and bracketed expires_after fields.
|
||||||
|
|
||||||
|
This ensures clients that send form fields like `expires_after[anchor]` and
|
||||||
|
`expires_after[seconds]` are handled by the server.
|
||||||
|
"""
|
||||||
|
base_url = f"{openai_client.base_url}files"
|
||||||
|
|
||||||
|
uploaded_id = None
|
||||||
|
try:
|
||||||
|
files = {"file": ("expires_after_with_requests.txt", BytesIO(b"expires_after via requests"))}
|
||||||
|
data = {
|
||||||
|
"purpose": "assistants",
|
||||||
|
"expires_after[anchor]": "created_at",
|
||||||
|
"expires_after[seconds]": "4545",
|
||||||
|
}
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
request = requests.Request("POST", base_url, files=files, data=data)
|
||||||
|
prepared = session.prepare_request(request)
|
||||||
|
resp = session.send(prepared, timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
result = resp.json()
|
||||||
|
|
||||||
|
assert result.get("id", "").startswith("file-")
|
||||||
|
uploaded_id = result["id"]
|
||||||
|
assert result.get("created_at") is not None
|
||||||
|
assert result.get("expires_at") == result["created_at"] + 4545
|
||||||
|
|
||||||
|
list_resp = requests.get(base_url, timeout=30)
|
||||||
|
list_resp.raise_for_status()
|
||||||
|
listed = list_resp.json()
|
||||||
|
ids = [f["id"] for f in listed.get("data", [])]
|
||||||
|
assert uploaded_id in ids
|
||||||
|
|
||||||
|
retrieve_resp = requests.get(f"{base_url}/{uploaded_id}", timeout=30)
|
||||||
|
retrieve_resp.raise_for_status()
|
||||||
|
retrieved = retrieve_resp.json()
|
||||||
|
assert retrieved["id"] == uploaded_id
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if uploaded_id:
|
||||||
|
try:
|
||||||
|
requests.delete(f"{base_url}/{uploaded_id}", timeout=30)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail(message="User isolation broken for current providers, must be fixed.")
|
@pytest.mark.xfail(message="User isolation broken for current providers, must be fixed.")
|
||||||
@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
|
@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
|
||||||
def test_files_authentication_isolation(mock_get_authenticated_user, llama_stack_client):
|
def test_files_authentication_isolation(mock_get_authenticated_user, llama_stack_client):
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ..test_cases.test_case import TestCase
|
from ..test_cases.test_case import TestCase
|
||||||
|
@ -35,6 +37,10 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
||||||
"remote::sambanova",
|
"remote::sambanova",
|
||||||
"remote::tgi",
|
"remote::tgi",
|
||||||
"remote::vertexai",
|
"remote::vertexai",
|
||||||
|
# {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
|
||||||
|
# or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
|
||||||
|
"remote::groq",
|
||||||
|
"remote::gemini", # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
|
||||||
):
|
):
|
||||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
||||||
|
|
||||||
|
@ -56,6 +62,21 @@ def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
|
||||||
pytest.skip(f"Provider {provider.provider_type} doesn't support suffix.")
|
pytest.skip(f"Provider {provider.provider_type} doesn't support suffix.")
|
||||||
|
|
||||||
|
|
||||||
|
def skip_if_doesnt_support_n(client_with_models, model_id):
|
||||||
|
provider = provider_from_model(client_with_models, model_id)
|
||||||
|
if provider.provider_type in (
|
||||||
|
"remote::sambanova",
|
||||||
|
"remote::ollama",
|
||||||
|
# https://console.groq.com/docs/openai#currently-unsupported-openai-features
|
||||||
|
# -> Error code: 400 - {'error': {'message': "'n' : number must be at most 1", 'type': 'invalid_request_error'}}
|
||||||
|
"remote::groq",
|
||||||
|
# Error code: 400 - [{'error': {'code': 400, 'message': 'Only one candidate can be specified in the
|
||||||
|
# current model', 'status': 'INVALID_ARGUMENT'}}]
|
||||||
|
"remote::gemini",
|
||||||
|
):
|
||||||
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
|
||||||
|
|
||||||
|
|
||||||
def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, model_id):
|
def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, model_id):
|
||||||
provider = provider_from_model(client_with_models, model_id)
|
provider = provider_from_model(client_with_models, model_id)
|
||||||
if provider.provider_type in (
|
if provider.provider_type in (
|
||||||
|
@ -260,10 +281,7 @@ def test_openai_chat_completion_streaming(compat_client, client_with_models, tex
|
||||||
)
|
)
|
||||||
def test_openai_chat_completion_streaming_with_n(compat_client, client_with_models, text_model_id, test_case):
|
def test_openai_chat_completion_streaming_with_n(compat_client, client_with_models, text_model_id, test_case):
|
||||||
skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
|
skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
|
||||||
|
skip_if_doesnt_support_n(client_with_models, text_model_id)
|
||||||
provider = provider_from_model(client_with_models, text_model_id)
|
|
||||||
if provider.provider_type == "remote::ollama":
|
|
||||||
pytest.skip(f"Model {text_model_id} hosted by {provider.provider_type} doesn't support n > 1.")
|
|
||||||
|
|
||||||
tc = TestCase(test_case)
|
tc = TestCase(test_case)
|
||||||
question = tc["question"]
|
question = tc["question"]
|
||||||
|
@ -323,8 +341,15 @@ def test_inference_store(compat_client, client_with_models, text_model_id, strea
|
||||||
response_id = response.id
|
response_id = response.id
|
||||||
content = response.choices[0].message.content
|
content = response.choices[0].message.content
|
||||||
|
|
||||||
responses = client.chat.completions.list(limit=1000)
|
tries = 0
|
||||||
assert response_id in [r.id for r in responses.data]
|
while tries < 10:
|
||||||
|
responses = client.chat.completions.list(limit=1000)
|
||||||
|
if response_id in [r.id for r in responses.data]:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
tries += 1
|
||||||
|
time.sleep(0.1)
|
||||||
|
assert tries < 10, f"Response {response_id} not found after 1 second"
|
||||||
|
|
||||||
retrieved_response = client.chat.completions.retrieve(response_id)
|
retrieved_response = client.chat.completions.retrieve(response_id)
|
||||||
assert retrieved_response.id == response_id
|
assert retrieved_response.id == response_id
|
||||||
|
@ -388,6 +413,18 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
|
||||||
response_id = response.id
|
response_id = response.id
|
||||||
content = response.choices[0].message.content
|
content = response.choices[0].message.content
|
||||||
|
|
||||||
|
# wait for the response to be stored
|
||||||
|
tries = 0
|
||||||
|
while tries < 10:
|
||||||
|
responses = client.chat.completions.list(limit=1000)
|
||||||
|
if response_id in [r.id for r in responses.data]:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
tries += 1
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
assert tries < 10, f"Response {response_id} not found after 1 second"
|
||||||
|
|
||||||
responses = client.chat.completions.list(limit=1000)
|
responses = client.chat.completions.list(limit=1000)
|
||||||
assert response_id in [r.id for r in responses.data]
|
assert response_id in [r.id for r in responses.data]
|
||||||
|
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:12:53.860911Z",
|
"created_at": "2025-09-03T17:37:35.23084Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 249137667,
|
"total_duration": 195981375,
|
||||||
"load_duration": 152509542,
|
"load_duration": 110522917,
|
||||||
"prompt_eval_count": 216,
|
"prompt_eval_count": 216,
|
||||||
"prompt_eval_duration": 71000000,
|
"prompt_eval_duration": 72393958,
|
||||||
"eval_count": 2,
|
"eval_count": 2,
|
||||||
"eval_duration": 24000000,
|
"eval_duration": 11843000,
|
||||||
"response": "safe",
|
"response": "safe",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.033900164Z",
|
"created_at": "2025-09-03T17:41:43.950283Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -39,7 +39,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.213371151Z",
|
"created_at": "2025-09-03T17:41:43.991122Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -57,7 +57,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.387513976Z",
|
"created_at": "2025-09-03T17:41:44.031378Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -75,7 +75,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.564344287Z",
|
"created_at": "2025-09-03T17:41:44.073098Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -93,7 +93,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.746579415Z",
|
"created_at": "2025-09-03T17:41:44.115961Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -111,7 +111,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.923276047Z",
|
"created_at": "2025-09-03T17:41:44.156517Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -129,7 +129,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.099961963Z",
|
"created_at": "2025-09-03T17:41:44.197079Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -147,7 +147,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.275621884Z",
|
"created_at": "2025-09-03T17:41:44.237565Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -165,7 +165,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.452204196Z",
|
"created_at": "2025-09-03T17:41:44.277755Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -183,7 +183,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.626937514Z",
|
"created_at": "2025-09-03T17:41:44.318476Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -201,7 +201,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.805566767Z",
|
"created_at": "2025-09-03T17:41:44.358628Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -219,7 +219,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.985987477Z",
|
"created_at": "2025-09-03T17:41:44.398984Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -237,7 +237,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:20.166458601Z",
|
"created_at": "2025-09-03T17:41:44.439232Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -255,7 +255,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:20.343346795Z",
|
"created_at": "2025-09-03T17:41:44.479478Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -273,7 +273,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:20.525008091Z",
|
"created_at": "2025-09-03T17:41:44.520202Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -291,7 +291,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:20.709087695Z",
|
"created_at": "2025-09-03T17:41:44.560517Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -309,7 +309,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:20.887074305Z",
|
"created_at": "2025-09-03T17:41:44.601592Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -327,15 +327,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:21.065244925Z",
|
"created_at": "2025-09-03T17:41:44.642064Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 4373531496,
|
"total_duration": 887142667,
|
||||||
"load_duration": 44438132,
|
"load_duration": 119331417,
|
||||||
"prompt_eval_count": 56,
|
"prompt_eval_count": 56,
|
||||||
"prompt_eval_duration": 1296273199,
|
"prompt_eval_duration": 74294709,
|
||||||
"eval_count": 18,
|
"eval_count": 18,
|
||||||
"eval_duration": 3032321735,
|
"eval_duration": 692842791,
|
||||||
"response": "",
|
"response": "",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:13:57.556416Z",
|
"created_at": "2025-09-03T17:37:47.461886Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 432363250,
|
"total_duration": 338927833,
|
||||||
"load_duration": 159296417,
|
"load_duration": 100895125,
|
||||||
"prompt_eval_count": 223,
|
"prompt_eval_count": 223,
|
||||||
"prompt_eval_duration": 257000000,
|
"prompt_eval_duration": 221583042,
|
||||||
"eval_count": 2,
|
"eval_count": 2,
|
||||||
"eval_duration": 14000000,
|
"eval_duration": 12341416,
|
||||||
"response": "safe",
|
"response": "safe",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -39,7 +39,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921333,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -50,7 +50,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -65,7 +65,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921333,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -76,7 +76,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -91,7 +91,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921333,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -102,7 +102,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -117,7 +117,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921333,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -128,7 +128,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -143,7 +143,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921334,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -154,7 +154,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -169,7 +169,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921334,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -180,7 +180,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -195,7 +195,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921334,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -206,7 +206,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -221,7 +221,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921334,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"request": {
|
"request": {
|
||||||
"method": "POST",
|
"method": "POST",
|
||||||
"url": "http://localhost:11434/v1/v1/chat/completions",
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
"headers": {},
|
"headers": {},
|
||||||
"body": {
|
"body": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
@ -20,14 +20,14 @@
|
||||||
"body": {
|
"body": {
|
||||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-368",
|
"id": "chatcmpl-161",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"finish_reason": "stop",
|
"finish_reason": "stop",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null,
|
"logprobs": null,
|
||||||
"message": {
|
"message": {
|
||||||
"content": "Saturn is known for its extensive ring system.",
|
"content": "The answer is Saturn.",
|
||||||
"refusal": null,
|
"refusal": null,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"annotations": null,
|
"annotations": null,
|
||||||
|
@ -37,15 +37,15 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754081853,
|
"created": 1756921364,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
"system_fingerprint": "fp_ollama",
|
"system_fingerprint": "fp_ollama",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 11,
|
"completion_tokens": 6,
|
||||||
"prompt_tokens": 39,
|
"prompt_tokens": 39,
|
||||||
"total_tokens": 50,
|
"total_tokens": 45,
|
||||||
"completion_tokens_details": null,
|
"completion_tokens_details": null,
|
||||||
"prompt_tokens_details": null
|
"prompt_tokens_details": null
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:12:51.682357Z",
|
"created_at": "2025-09-03T17:37:33.473237Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 238161000,
|
"total_duration": 279025042,
|
||||||
"load_duration": 72494750,
|
"load_duration": 162673250,
|
||||||
"prompt_eval_count": 212,
|
"prompt_eval_count": 212,
|
||||||
"prompt_eval_duration": 87000000,
|
"prompt_eval_duration": 73595834,
|
||||||
"eval_count": 5,
|
"eval_count": 5,
|
||||||
"eval_duration": 74000000,
|
"eval_duration": 41950291,
|
||||||
"response": "unsafe\nS8",
|
"response": "unsafe\nS8",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"request": {
|
"request": {
|
||||||
"method": "POST",
|
"method": "POST",
|
||||||
"url": "http://localhost:11434/v1/v1/chat/completions",
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
"headers": {},
|
"headers": {},
|
||||||
"body": {
|
"body": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
@ -20,14 +20,14 @@
|
||||||
"body": {
|
"body": {
|
||||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-876",
|
"id": "chatcmpl-974",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"finish_reason": "stop",
|
"finish_reason": "stop",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null,
|
"logprobs": null,
|
||||||
"message": {
|
"message": {
|
||||||
"content": "I'm afraid I don't have a built-in ability to directly interface with or \"test\" OpenAI models, including the original GPT-1 model. However, I can explain how you might approach this task:\n\nThe OpenAI GPT-1 is a large transformer-based language model that was trained on a massive dataset of text and achieved state-of-the-art results in various natural language processing tasks.\n\nTo test or evaluate the performance of a model like GPT-1, you would typically follow these steps:\n\n1. **Get access to the OpenAI API**: The OpenAI API provides a way for developers to interact with the GPT-1 model programmatically. You can sign up for an API key on the OpenAI website.\n2. **Choose a testing platform or environment**: You'll need a compute platform that supports the necessary algorithms and data structures to run inference on the GPT-1 model. Some popular options include AWS, Google Cloud, or Azure Compute Virtual Machines.\n3. **Prepare your test input data**: This will involve creating text inputs in the format expected by the OpenAI API (i.e., a JSON object containing the text to be processed).\n4. **Use the OpenAI Python library or SDK**: The OpenAI Python library provides an easy-to-use interface for interacting with the GPT-1 model through the API.\n\nHere's some example code that demonstrates how you might use the OpenAI Flask API to test a single input:\n\n```python\nfrom flask import Flask, request, jsonify\nimport json\n\napp = Flask(__name__)\n\n@ app . route ( '/ /gpt-en ', ' Text ', methods = ['POST'])\ndef gpt_en () -> Json :\n data = request . get_json ()\n if not data or \"message\" in ( data ):\n return None , 400 , { ' error' : \"Input must be a text string.\" }\n response = []\n while True:\n message = \"\"\n for token in data [\"input\"]:\n response_text = f\"{data['prompt']} {token}\"\n data[\"input\"] = [response_text]\n new_response = gpt_en()(data)\n if all([not item or not isinstance(item, dict) for item in new_response]):\n break\n\n message = json . dumps ({}\"text\": response_text})\n response.append(message)\n\n return jsonify ({\"output\": response}), 200 , {}\n\nif __name__ == \"__main__\":\n app.run(debug=True)\n```\n\n5. **Evaluate the output**: Once you have processed your test input data using the GPT-1 model, you can evaluate the accuracy of the generated responses.\n\nKeep in mind that this is just a basic example to illustrate how you might approach testing the OpenAI GPT-1 model.",
|
"content": "I'm happy to help you test the OpenAI API, however I can not access the API.\n\nInstead why don't we follow these steps:\n\n* Check documentation\n* Contact support\n* Reach out to their community forum. \n\nLet me know if I can be of any additional assistance",
|
||||||
"refusal": null,
|
"refusal": null,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"annotations": null,
|
"annotations": null,
|
||||||
|
@ -37,15 +37,15 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754510050,
|
"created": 1756921202,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
"system_fingerprint": "fp_ollama",
|
"system_fingerprint": "fp_ollama",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 567,
|
"completion_tokens": 61,
|
||||||
"prompt_tokens": 31,
|
"prompt_tokens": 31,
|
||||||
"total_tokens": 598,
|
"total_tokens": 92,
|
||||||
"completion_tokens_details": null,
|
"completion_tokens_details": null,
|
||||||
"prompt_tokens_details": null
|
"prompt_tokens_details": null
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:12:52.919624Z",
|
"created_at": "2025-09-03T17:37:34.308033Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 201956834,
|
"total_duration": 200296000,
|
||||||
"load_duration": 105132584,
|
"load_duration": 115974708,
|
||||||
"prompt_eval_count": 212,
|
"prompt_eval_count": 212,
|
||||||
"prompt_eval_duration": 75000000,
|
"prompt_eval_duration": 72173459,
|
||||||
"eval_count": 2,
|
"eval_count": 2,
|
||||||
"eval_duration": 20000000,
|
"eval_duration": 11536750,
|
||||||
"response": "safe",
|
"response": "safe",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -40,7 +40,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-457",
|
"id": "chatcmpl-921",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -55,7 +55,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090032,
|
"created": 1756920971,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -66,7 +66,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-457",
|
"id": "chatcmpl-921",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -81,7 +81,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090032,
|
"created": 1756920971,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -92,7 +92,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-457",
|
"id": "chatcmpl-921",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -107,7 +107,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090032,
|
"created": 1756920971,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -118,7 +118,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-457",
|
"id": "chatcmpl-921",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -133,7 +133,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090032,
|
"created": 1756920971,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -144,7 +144,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-457",
|
"id": "chatcmpl-921",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -159,7 +159,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090032,
|
"created": 1756920971,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -170,7 +170,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-457",
|
"id": "chatcmpl-921",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -185,7 +185,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090032,
|
"created": 1756920971,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -196,7 +196,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-457",
|
"id": "chatcmpl-921",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -211,7 +211,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090032,
|
"created": 1756920971,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -222,7 +222,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-457",
|
"id": "chatcmpl-921",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -237,7 +237,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090032,
|
"created": 1756920971,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:12:53.580806Z",
|
"created_at": "2025-09-03T17:37:34.994704Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 205732750,
|
"total_duration": 339570875,
|
||||||
"load_duration": 98967000,
|
"load_duration": 262794125,
|
||||||
"prompt_eval_count": 213,
|
"prompt_eval_count": 213,
|
||||||
"prompt_eval_duration": 86000000,
|
"prompt_eval_duration": 64061000,
|
||||||
"eval_count": 2,
|
"eval_count": 2,
|
||||||
"eval_duration": 18000000,
|
"eval_duration": 11839042,
|
||||||
"response": "safe",
|
"response": "safe",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:12:52.354566Z",
|
"created_at": "2025-09-03T17:37:33.769233Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 605192500,
|
"total_duration": 253836584,
|
||||||
"load_duration": 457087166,
|
"load_duration": 138624959,
|
||||||
"prompt_eval_count": 210,
|
"prompt_eval_count": 210,
|
||||||
"prompt_eval_duration": 63000000,
|
"prompt_eval_duration": 69496125,
|
||||||
"eval_count": 5,
|
"eval_count": 5,
|
||||||
"eval_duration": 84000000,
|
"eval_duration": 45062833,
|
||||||
"response": "unsafe\nS12",
|
"response": "unsafe\nS12",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:12:52.686478Z",
|
"created_at": "2025-09-03T17:37:34.074233Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 304136208,
|
"total_duration": 270746375,
|
||||||
"load_duration": 155977000,
|
"load_duration": 156423042,
|
||||||
"prompt_eval_count": 213,
|
"prompt_eval_count": 213,
|
||||||
"prompt_eval_duration": 71000000,
|
"prompt_eval_duration": 70338083,
|
||||||
"eval_count": 5,
|
"eval_count": 5,
|
||||||
"eval_duration": 76000000,
|
"eval_duration": 43379167,
|
||||||
"response": "unsafe\nS2",
|
"response": "unsafe\nS2",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:12:51.186501Z",
|
"created_at": "2025-09-03T17:37:32.84197Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 3146184459,
|
"total_duration": 21572898667,
|
||||||
"load_duration": 2533467917,
|
"load_duration": 21155275042,
|
||||||
"prompt_eval_count": 212,
|
"prompt_eval_count": 212,
|
||||||
"prompt_eval_duration": 526000000,
|
"prompt_eval_duration": 371898125,
|
||||||
"eval_count": 5,
|
"eval_count": 5,
|
||||||
"eval_duration": 83000000,
|
"eval_duration": 43290458,
|
||||||
"response": "unsafe\nS1",
|
"response": "unsafe\nS1",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:12:53.332041Z",
|
"created_at": "2025-09-03T17:37:34.607413Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 365895333,
|
"total_duration": 267812042,
|
||||||
"load_duration": 257825208,
|
"load_duration": 181570000,
|
||||||
"prompt_eval_count": 213,
|
"prompt_eval_count": 213,
|
||||||
"prompt_eval_duration": 78000000,
|
"prompt_eval_duration": 73947375,
|
||||||
"eval_count": 2,
|
"eval_count": 2,
|
||||||
"eval_duration": 28000000,
|
"eval_duration": 11708000,
|
||||||
"response": "safe",
|
"response": "safe",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -22,15 +22,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:05.685988Z",
|
"created_at": "2025-09-03T17:36:13.821929Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 14128980625,
|
"total_duration": 1907912167,
|
||||||
"load_duration": 7220159208,
|
"load_duration": 90979292,
|
||||||
"prompt_eval_count": 18,
|
"prompt_eval_count": 18,
|
||||||
"prompt_eval_duration": 4658000000,
|
"prompt_eval_duration": 77350291,
|
||||||
"eval_count": 43,
|
"eval_count": 43,
|
||||||
"eval_duration": 2224000000,
|
"eval_duration": 1738568334,
|
||||||
"response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
|
"response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:50:06.140190726Z",
|
"created_at": "2025-09-03T17:39:38.236797Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 5213341378,
|
"total_duration": 1296281500,
|
||||||
"load_duration": 43943569,
|
"load_duration": 283393917,
|
||||||
"prompt_eval_count": 23,
|
"prompt_eval_count": 23,
|
||||||
"prompt_eval_duration": 1049424427,
|
"prompt_eval_duration": 75453042,
|
||||||
"eval_count": 24,
|
"eval_count": 24,
|
||||||
"eval_duration": 4119422888,
|
"eval_duration": 936860125,
|
||||||
"response": "Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004.",
|
"response": "Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004.",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
422
tests/integration/recordings/responses/1e11c2b20ff8.json
Normal file
422
tests/integration/recordings/responses/1e11c2b20ff8.json
Normal file
|
@ -0,0 +1,422 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "all-minilm:l6-v2",
|
||||||
|
"input": [
|
||||||
|
"How do systems learn automatically?"
|
||||||
|
],
|
||||||
|
"encoding_format": "float"
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/embeddings",
|
||||||
|
"model": "all-minilm:l6-v2"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
|
||||||
|
"__data__": {
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"embedding": [
|
||||||
|
0.042460807,
|
||||||
|
-0.06189971,
|
||||||
|
-0.0784711,
|
||||||
|
0.0064329687,
|
||||||
|
0.03129365,
|
||||||
|
0.00807445,
|
||||||
|
0.05801836,
|
||||||
|
0.025447326,
|
||||||
|
0.016402787,
|
||||||
|
0.045995634,
|
||||||
|
-0.028924342,
|
||||||
|
0.04451832,
|
||||||
|
0.05686613,
|
||||||
|
-0.015340794,
|
||||||
|
-0.07020505,
|
||||||
|
-0.057178136,
|
||||||
|
-0.07683263,
|
||||||
|
0.006748679,
|
||||||
|
0.0043323045,
|
||||||
|
-0.123651944,
|
||||||
|
0.0031534543,
|
||||||
|
-0.03258051,
|
||||||
|
-0.02936216,
|
||||||
|
0.024140852,
|
||||||
|
-0.028559243,
|
||||||
|
0.10224467,
|
||||||
|
0.0021632623,
|
||||||
|
-0.006975691,
|
||||||
|
0.025292527,
|
||||||
|
-0.055500276,
|
||||||
|
0.031231727,
|
||||||
|
-0.0070274337,
|
||||||
|
0.08430815,
|
||||||
|
-0.028431177,
|
||||||
|
-0.083029,
|
||||||
|
0.009555893,
|
||||||
|
-0.020029299,
|
||||||
|
-0.00243229,
|
||||||
|
-0.00768719,
|
||||||
|
-0.023077851,
|
||||||
|
-0.09293533,
|
||||||
|
-0.042625993,
|
||||||
|
-0.020000124,
|
||||||
|
0.008240663,
|
||||||
|
0.060970567,
|
||||||
|
0.050315727,
|
||||||
|
-0.0510085,
|
||||||
|
-0.008543903,
|
||||||
|
-0.030227834,
|
||||||
|
-0.03582846,
|
||||||
|
-0.17836656,
|
||||||
|
-0.047279052,
|
||||||
|
0.033892106,
|
||||||
|
0.031623542,
|
||||||
|
-0.008832113,
|
||||||
|
0.10480918,
|
||||||
|
0.033559043,
|
||||||
|
0.090348184,
|
||||||
|
-0.015757555,
|
||||||
|
-0.0125672715,
|
||||||
|
-0.084686965,
|
||||||
|
-0.114781834,
|
||||||
|
-0.13755985,
|
||||||
|
0.021652374,
|
||||||
|
0.047834594,
|
||||||
|
0.043243896,
|
||||||
|
0.008659893,
|
||||||
|
0.038724966,
|
||||||
|
0.046716973,
|
||||||
|
-0.077413626,
|
||||||
|
-0.04887495,
|
||||||
|
0.031287406,
|
||||||
|
0.022356613,
|
||||||
|
0.00043283988,
|
||||||
|
0.052321073,
|
||||||
|
-0.012254071,
|
||||||
|
-0.035172574,
|
||||||
|
-0.00825216,
|
||||||
|
-0.008866574,
|
||||||
|
-0.034267236,
|
||||||
|
-0.04576201,
|
||||||
|
0.002467568,
|
||||||
|
-0.040877618,
|
||||||
|
0.08047682,
|
||||||
|
0.09472728,
|
||||||
|
0.0413438,
|
||||||
|
0.0057974122,
|
||||||
|
0.044982508,
|
||||||
|
0.025369909,
|
||||||
|
0.006618073,
|
||||||
|
0.010467276,
|
||||||
|
-0.07960384,
|
||||||
|
-0.03108485,
|
||||||
|
-0.03528749,
|
||||||
|
0.01831391,
|
||||||
|
0.053473305,
|
||||||
|
0.06568304,
|
||||||
|
-0.07259002,
|
||||||
|
0.02523736,
|
||||||
|
0.10520362,
|
||||||
|
0.035732146,
|
||||||
|
0.028157586,
|
||||||
|
0.011687256,
|
||||||
|
0.044207197,
|
||||||
|
0.012604437,
|
||||||
|
0.0018819098,
|
||||||
|
0.03926183,
|
||||||
|
0.043135095,
|
||||||
|
0.09784739,
|
||||||
|
-0.08801336,
|
||||||
|
-0.06060836,
|
||||||
|
0.02681984,
|
||||||
|
0.0041358666,
|
||||||
|
0.033492945,
|
||||||
|
0.011799116,
|
||||||
|
0.009551661,
|
||||||
|
-0.0095491735,
|
||||||
|
-0.021212189,
|
||||||
|
-0.008917248,
|
||||||
|
0.029352615,
|
||||||
|
-0.012693442,
|
||||||
|
-0.019269384,
|
||||||
|
0.009901157,
|
||||||
|
-0.00812101,
|
||||||
|
0.018603146,
|
||||||
|
-0.0007501193,
|
||||||
|
-0.056115113,
|
||||||
|
-3.8018077e-33,
|
||||||
|
0.020848714,
|
||||||
|
0.0047160466,
|
||||||
|
0.019726405,
|
||||||
|
0.06024251,
|
||||||
|
-0.0685974,
|
||||||
|
-0.07497267,
|
||||||
|
0.007997452,
|
||||||
|
-0.047339544,
|
||||||
|
0.057801835,
|
||||||
|
0.049544968,
|
||||||
|
0.01878086,
|
||||||
|
0.03274472,
|
||||||
|
0.017663997,
|
||||||
|
0.07483022,
|
||||||
|
0.02496901,
|
||||||
|
-0.011843339,
|
||||||
|
-0.11212756,
|
||||||
|
0.0070379525,
|
||||||
|
0.028099466,
|
||||||
|
-0.01746246,
|
||||||
|
0.08173482,
|
||||||
|
-0.007920462,
|
||||||
|
0.032095373,
|
||||||
|
-0.12300146,
|
||||||
|
0.033773854,
|
||||||
|
0.025873141,
|
||||||
|
-0.0045020077,
|
||||||
|
0.079493225,
|
||||||
|
0.0040725255,
|
||||||
|
0.03305898,
|
||||||
|
0.008061117,
|
||||||
|
0.0134422695,
|
||||||
|
-0.03292251,
|
||||||
|
0.031554114,
|
||||||
|
0.04013794,
|
||||||
|
0.0014983519,
|
||||||
|
0.030762345,
|
||||||
|
0.029481992,
|
||||||
|
0.041350223,
|
||||||
|
-0.047438618,
|
||||||
|
0.03944708,
|
||||||
|
-0.07526981,
|
||||||
|
0.037927423,
|
||||||
|
-0.026016014,
|
||||||
|
0.016933467,
|
||||||
|
0.0136799775,
|
||||||
|
0.0071263947,
|
||||||
|
-0.05386736,
|
||||||
|
-0.07443268,
|
||||||
|
-0.006070775,
|
||||||
|
0.024427462,
|
||||||
|
-0.039844982,
|
||||||
|
-0.020661902,
|
||||||
|
-0.033354662,
|
||||||
|
0.009005565,
|
||||||
|
0.12111172,
|
||||||
|
-0.028260944,
|
||||||
|
-0.036192853,
|
||||||
|
-0.021332363,
|
||||||
|
0.05333571,
|
||||||
|
0.05161245,
|
||||||
|
-0.01204843,
|
||||||
|
0.035563566,
|
||||||
|
0.05408247,
|
||||||
|
0.060722187,
|
||||||
|
0.07159865,
|
||||||
|
0.04299143,
|
||||||
|
0.008544481,
|
||||||
|
0.07421879,
|
||||||
|
0.00841512,
|
||||||
|
-0.036342908,
|
||||||
|
-0.008549791,
|
||||||
|
-0.08816386,
|
||||||
|
-0.049075164,
|
||||||
|
0.00029373015,
|
||||||
|
-0.05127952,
|
||||||
|
0.03586739,
|
||||||
|
-0.030380003,
|
||||||
|
-0.012642127,
|
||||||
|
0.018771531,
|
||||||
|
0.01711824,
|
||||||
|
-0.06644723,
|
||||||
|
0.023793438,
|
||||||
|
0.0010271219,
|
||||||
|
-0.01939443,
|
||||||
|
-0.053452212,
|
||||||
|
-0.017060323,
|
||||||
|
-0.062207118,
|
||||||
|
-0.05962535,
|
||||||
|
-0.012172617,
|
||||||
|
-0.013190802,
|
||||||
|
-0.037036054,
|
||||||
|
0.00082622556,
|
||||||
|
0.098088354,
|
||||||
|
0.024690514,
|
||||||
|
2.1767905e-33,
|
||||||
|
-0.010088812,
|
||||||
|
-0.016811697,
|
||||||
|
-0.042140447,
|
||||||
|
0.08837209,
|
||||||
|
-0.028899776,
|
||||||
|
-0.0048947735,
|
||||||
|
-0.082139015,
|
||||||
|
0.029238816,
|
||||||
|
-0.043079354,
|
||||||
|
-0.014153092,
|
||||||
|
-0.028387645,
|
||||||
|
0.025998218,
|
||||||
|
-0.017625,
|
||||||
|
0.046511114,
|
||||||
|
-0.005768211,
|
||||||
|
0.030010609,
|
||||||
|
0.011375536,
|
||||||
|
0.017426634,
|
||||||
|
0.055062976,
|
||||||
|
0.032230247,
|
||||||
|
-0.07995765,
|
||||||
|
0.032486655,
|
||||||
|
-0.060016844,
|
||||||
|
-0.011561194,
|
||||||
|
0.010211269,
|
||||||
|
0.046528235,
|
||||||
|
0.001191399,
|
||||||
|
0.0786961,
|
||||||
|
-0.0446158,
|
||||||
|
0.032789085,
|
||||||
|
0.0023115936,
|
||||||
|
-0.03886269,
|
||||||
|
-0.017663589,
|
||||||
|
0.07913024,
|
||||||
|
-0.004583343,
|
||||||
|
0.043521065,
|
||||||
|
-0.031589273,
|
||||||
|
0.008867868,
|
||||||
|
-0.05013296,
|
||||||
|
0.068929516,
|
||||||
|
0.043675046,
|
||||||
|
0.019968731,
|
||||||
|
-0.08471742,
|
||||||
|
-0.046864275,
|
||||||
|
-0.0068198936,
|
||||||
|
-0.026138468,
|
||||||
|
-0.05107216,
|
||||||
|
0.054374695,
|
||||||
|
0.03069186,
|
||||||
|
-0.010925094,
|
||||||
|
0.04721093,
|
||||||
|
-0.017387696,
|
||||||
|
-0.020754937,
|
||||||
|
-0.081763394,
|
||||||
|
-0.027709637,
|
||||||
|
0.035980806,
|
||||||
|
0.05396534,
|
||||||
|
0.044874854,
|
||||||
|
0.059699643,
|
||||||
|
0.041227758,
|
||||||
|
-0.06664364,
|
||||||
|
-0.09201654,
|
||||||
|
0.008915574,
|
||||||
|
0.025849758,
|
||||||
|
-0.038651932,
|
||||||
|
-0.0044070315,
|
||||||
|
-0.052066546,
|
||||||
|
0.027435115,
|
||||||
|
0.012089562,
|
||||||
|
0.048306923,
|
||||||
|
0.059854515,
|
||||||
|
0.097325735,
|
||||||
|
-0.053612895,
|
||||||
|
-0.07639326,
|
||||||
|
0.015773866,
|
||||||
|
-0.0444848,
|
||||||
|
-0.13214406,
|
||||||
|
-0.0702488,
|
||||||
|
-0.10134438,
|
||||||
|
-0.11905995,
|
||||||
|
-0.027714504,
|
||||||
|
0.006891868,
|
||||||
|
-0.0053650527,
|
||||||
|
0.054135524,
|
||||||
|
-0.111159205,
|
||||||
|
0.07835098,
|
||||||
|
0.03506018,
|
||||||
|
0.016036613,
|
||||||
|
0.021490784,
|
||||||
|
-0.061526407,
|
||||||
|
0.007425222,
|
||||||
|
0.04833579,
|
||||||
|
-0.01361202,
|
||||||
|
0.012450488,
|
||||||
|
-0.12729599,
|
||||||
|
-1.4009424e-08,
|
||||||
|
-0.040908325,
|
||||||
|
-0.01596458,
|
||||||
|
0.060048707,
|
||||||
|
0.03804525,
|
||||||
|
0.0663794,
|
||||||
|
0.04727275,
|
||||||
|
-0.016112225,
|
||||||
|
0.09687414,
|
||||||
|
-0.04424251,
|
||||||
|
-0.028799534,
|
||||||
|
-0.01294642,
|
||||||
|
0.013026413,
|
||||||
|
0.022404836,
|
||||||
|
0.04713173,
|
||||||
|
0.06402557,
|
||||||
|
0.12130648,
|
||||||
|
0.06062839,
|
||||||
|
0.10218965,
|
||||||
|
-0.0757528,
|
||||||
|
-0.023806982,
|
||||||
|
0.12489501,
|
||||||
|
-0.045460615,
|
||||||
|
0.09545599,
|
||||||
|
0.021262301,
|
||||||
|
0.03731495,
|
||||||
|
-0.075220875,
|
||||||
|
-0.0026194793,
|
||||||
|
0.0472452,
|
||||||
|
0.048499025,
|
||||||
|
0.12358729,
|
||||||
|
0.017998053,
|
||||||
|
0.013811017,
|
||||||
|
-0.035893846,
|
||||||
|
-0.051789004,
|
||||||
|
0.06182457,
|
||||||
|
0.05160056,
|
||||||
|
0.008895317,
|
||||||
|
-0.12500942,
|
||||||
|
0.016453298,
|
||||||
|
-0.08590811,
|
||||||
|
-0.071096726,
|
||||||
|
0.06987216,
|
||||||
|
-0.036072273,
|
||||||
|
-0.0053715096,
|
||||||
|
-0.048762616,
|
||||||
|
0.00081640907,
|
||||||
|
-0.021502526,
|
||||||
|
-0.061078615,
|
||||||
|
0.002485032,
|
||||||
|
-0.032720752,
|
||||||
|
0.045743283,
|
||||||
|
0.038934175,
|
||||||
|
-0.024666062,
|
||||||
|
0.025897244,
|
||||||
|
0.10301431,
|
||||||
|
-0.013001504,
|
||||||
|
0.04783332,
|
||||||
|
-0.07114252,
|
||||||
|
0.046031926,
|
||||||
|
0.080549754,
|
||||||
|
-0.10302451,
|
||||||
|
0.08449227,
|
||||||
|
0.028010191,
|
||||||
|
-0.03697792
|
||||||
|
],
|
||||||
|
"index": 0,
|
||||||
|
"object": "embedding"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"model": "all-minilm:l6-v2",
|
||||||
|
"object": "list",
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 6,
|
||||||
|
"total_tokens": 6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:11.15982Z",
|
"created_at": "2025-09-03T17:36:17.894986Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 498612042,
|
"total_duration": 363397458,
|
||||||
"load_duration": 71411834,
|
"load_duration": 86692791,
|
||||||
"prompt_eval_count": 23,
|
"prompt_eval_count": 23,
|
||||||
"prompt_eval_duration": 102000000,
|
"prompt_eval_duration": 68658541,
|
||||||
"eval_count": 6,
|
"eval_count": 6,
|
||||||
"eval_duration": 323000000,
|
"eval_duration": 207389084,
|
||||||
"response": "Humans live on Earth.",
|
"response": "Humans live on Earth.",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
422
tests/integration/recordings/responses/23506e73bb9e.json
Normal file
422
tests/integration/recordings/responses/23506e73bb9e.json
Normal file
|
@ -0,0 +1,422 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "all-minilm:l6-v2",
|
||||||
|
"input": [
|
||||||
|
"This is a test file 1"
|
||||||
|
],
|
||||||
|
"encoding_format": "float"
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/embeddings",
|
||||||
|
"model": "all-minilm:l6-v2"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
|
||||||
|
"__data__": {
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"embedding": [
|
||||||
|
-0.055990793,
|
||||||
|
0.076004684,
|
||||||
|
-0.09247725,
|
||||||
|
0.014340361,
|
||||||
|
0.058780864,
|
||||||
|
-0.032434482,
|
||||||
|
0.020954052,
|
||||||
|
0.028818125,
|
||||||
|
-0.06591213,
|
||||||
|
0.013541593,
|
||||||
|
0.12999941,
|
||||||
|
0.004603084,
|
||||||
|
-0.0069239275,
|
||||||
|
-0.055457443,
|
||||||
|
-0.047553156,
|
||||||
|
-0.029139794,
|
||||||
|
-0.12236376,
|
||||||
|
-0.05360872,
|
||||||
|
-0.014706594,
|
||||||
|
0.05984688,
|
||||||
|
0.034442738,
|
||||||
|
0.02076038,
|
||||||
|
-0.048697792,
|
||||||
|
0.0135388365,
|
||||||
|
0.058592733,
|
||||||
|
-0.003076384,
|
||||||
|
-0.031565297,
|
||||||
|
0.082541116,
|
||||||
|
-0.031259205,
|
||||||
|
-0.12057633,
|
||||||
|
0.038319625,
|
||||||
|
0.06574785,
|
||||||
|
0.06415721,
|
||||||
|
0.038382582,
|
||||||
|
0.12570712,
|
||||||
|
0.03108174,
|
||||||
|
0.10821103,
|
||||||
|
-0.0019794356,
|
||||||
|
-0.024704305,
|
||||||
|
0.028765837,
|
||||||
|
0.01268161,
|
||||||
|
-0.039844505,
|
||||||
|
0.043253522,
|
||||||
|
-0.015898596,
|
||||||
|
-0.0135526005,
|
||||||
|
-0.0050831717,
|
||||||
|
-0.007911988,
|
||||||
|
0.039783813,
|
||||||
|
0.0036548872,
|
||||||
|
-0.033632487,
|
||||||
|
-0.058547974,
|
||||||
|
0.0048877494,
|
||||||
|
-0.089586094,
|
||||||
|
-0.010457663,
|
||||||
|
0.059202507,
|
||||||
|
-0.020414542,
|
||||||
|
0.014278556,
|
||||||
|
0.013986488,
|
||||||
|
-0.0046022516,
|
||||||
|
0.0383391,
|
||||||
|
0.0048145773,
|
||||||
|
0.029772853,
|
||||||
|
-0.020863408,
|
||||||
|
0.018640704,
|
||||||
|
0.12422993,
|
||||||
|
-0.023236223,
|
||||||
|
-0.040323637,
|
||||||
|
-0.023598222,
|
||||||
|
-0.007448043,
|
||||||
|
-0.09083128,
|
||||||
|
-0.16859712,
|
||||||
|
0.01012451,
|
||||||
|
-0.035808884,
|
||||||
|
0.010595173,
|
||||||
|
-0.02050494,
|
||||||
|
0.0020821376,
|
||||||
|
-0.10925222,
|
||||||
|
0.00793264,
|
||||||
|
0.048889533,
|
||||||
|
-0.11391199,
|
||||||
|
-0.06072707,
|
||||||
|
-0.13435508,
|
||||||
|
0.0063265716,
|
||||||
|
-0.008838073,
|
||||||
|
-0.03153269,
|
||||||
|
0.099169336,
|
||||||
|
0.055310693,
|
||||||
|
0.0068571265,
|
||||||
|
-0.023463152,
|
||||||
|
-0.0031599961,
|
||||||
|
0.036782328,
|
||||||
|
0.014336826,
|
||||||
|
0.022220163,
|
||||||
|
0.047114056,
|
||||||
|
0.007079763,
|
||||||
|
0.06806425,
|
||||||
|
0.01851431,
|
||||||
|
0.040882625,
|
||||||
|
0.055058856,
|
||||||
|
0.09488346,
|
||||||
|
-0.015833577,
|
||||||
|
-7.924328e-05,
|
||||||
|
0.010821554,
|
||||||
|
0.09177704,
|
||||||
|
-0.07464829,
|
||||||
|
-0.06471165,
|
||||||
|
0.07013805,
|
||||||
|
-0.04499751,
|
||||||
|
0.057702336,
|
||||||
|
-0.0260911,
|
||||||
|
0.006323043,
|
||||||
|
-0.09500501,
|
||||||
|
-0.010549514,
|
||||||
|
-0.07887475,
|
||||||
|
0.039744847,
|
||||||
|
-0.04154404,
|
||||||
|
-0.055268157,
|
||||||
|
0.07540271,
|
||||||
|
-0.04667509,
|
||||||
|
0.036143072,
|
||||||
|
0.080297194,
|
||||||
|
-0.036381353,
|
||||||
|
-0.03477274,
|
||||||
|
0.01701203,
|
||||||
|
-0.047007203,
|
||||||
|
-0.06519774,
|
||||||
|
0.062141683,
|
||||||
|
-4.222482e-33,
|
||||||
|
-0.0017580023,
|
||||||
|
-0.09383388,
|
||||||
|
-0.02982657,
|
||||||
|
0.1257841,
|
||||||
|
0.03802007,
|
||||||
|
-0.03654342,
|
||||||
|
0.0060920226,
|
||||||
|
0.05906885,
|
||||||
|
-0.11074452,
|
||||||
|
0.005664566,
|
||||||
|
-0.0259852,
|
||||||
|
-0.074819505,
|
||||||
|
0.008342821,
|
||||||
|
0.027451068,
|
||||||
|
-0.05248069,
|
||||||
|
0.02401768,
|
||||||
|
-0.004380289,
|
||||||
|
0.039321493,
|
||||||
|
-0.04213744,
|
||||||
|
-0.027290314,
|
||||||
|
0.054677974,
|
||||||
|
0.02707243,
|
||||||
|
-0.03329442,
|
||||||
|
-0.060589895,
|
||||||
|
-0.050737355,
|
||||||
|
0.017969057,
|
||||||
|
-0.0035060972,
|
||||||
|
-0.04666249,
|
||||||
|
0.073946096,
|
||||||
|
0.01333894,
|
||||||
|
-0.0033873583,
|
||||||
|
-0.046544433,
|
||||||
|
-0.060105033,
|
||||||
|
0.03406923,
|
||||||
|
0.001542676,
|
||||||
|
0.039177947,
|
||||||
|
0.03989323,
|
||||||
|
-0.012346489,
|
||||||
|
-0.030511485,
|
||||||
|
-0.0019157606,
|
||||||
|
-0.014608986,
|
||||||
|
-0.012997742,
|
||||||
|
0.019522104,
|
||||||
|
-0.022349002,
|
||||||
|
0.074362256,
|
||||||
|
-0.053366993,
|
||||||
|
-0.023993475,
|
||||||
|
0.029225096,
|
||||||
|
0.027534606,
|
||||||
|
0.015111057,
|
||||||
|
-0.020442221,
|
||||||
|
0.043327376,
|
||||||
|
0.019660354,
|
||||||
|
0.017330697,
|
||||||
|
-0.0035011724,
|
||||||
|
0.019482937,
|
||||||
|
-0.0003428041,
|
||||||
|
0.0004143988,
|
||||||
|
-0.005117252,
|
||||||
|
0.06624799,
|
||||||
|
0.027922852,
|
||||||
|
0.041020587,
|
||||||
|
-0.067166425,
|
||||||
|
0.028737254,
|
||||||
|
-0.03478325,
|
||||||
|
-0.055551115,
|
||||||
|
-0.032713737,
|
||||||
|
-0.08099247,
|
||||||
|
0.09216284,
|
||||||
|
0.06395264,
|
||||||
|
-0.049168136,
|
||||||
|
-0.039908994,
|
||||||
|
0.036915958,
|
||||||
|
-0.001602359,
|
||||||
|
0.00033041168,
|
||||||
|
-0.026015632,
|
||||||
|
-0.005999889,
|
||||||
|
0.05474541,
|
||||||
|
-0.09568287,
|
||||||
|
-0.05186289,
|
||||||
|
-0.048838183,
|
||||||
|
-0.08639551,
|
||||||
|
-0.034023147,
|
||||||
|
-0.033257127,
|
||||||
|
-0.05651867,
|
||||||
|
-0.051131375,
|
||||||
|
0.00809173,
|
||||||
|
-0.08581851,
|
||||||
|
0.06507323,
|
||||||
|
-0.085427366,
|
||||||
|
0.027997404,
|
||||||
|
0.029847065,
|
||||||
|
-0.031673994,
|
||||||
|
-0.08560956,
|
||||||
|
0.1017672,
|
||||||
|
2.1855676e-33,
|
||||||
|
0.01160785,
|
||||||
|
0.077607885,
|
||||||
|
-0.017380483,
|
||||||
|
0.005239329,
|
||||||
|
0.0009684126,
|
||||||
|
0.06543702,
|
||||||
|
0.07256893,
|
||||||
|
-0.044318836,
|
||||||
|
-0.04749324,
|
||||||
|
0.14031002,
|
||||||
|
-0.025741624,
|
||||||
|
0.0057860985,
|
||||||
|
0.040946104,
|
||||||
|
-0.054880083,
|
||||||
|
0.074413285,
|
||||||
|
-0.023610368,
|
||||||
|
0.018364722,
|
||||||
|
-0.060585637,
|
||||||
|
-0.044149306,
|
||||||
|
0.0027854694,
|
||||||
|
-0.04580664,
|
||||||
|
0.1172219,
|
||||||
|
0.10268574,
|
||||||
|
0.07907412,
|
||||||
|
-0.0466143,
|
||||||
|
0.018618405,
|
||||||
|
0.029834948,
|
||||||
|
0.037265483,
|
||||||
|
0.02273822,
|
||||||
|
-0.0026589038,
|
||||||
|
0.041726097,
|
||||||
|
0.06439532,
|
||||||
|
-0.089163445,
|
||||||
|
0.018188318,
|
||||||
|
0.024064727,
|
||||||
|
-0.096389584,
|
||||||
|
0.08642254,
|
||||||
|
-0.05389359,
|
||||||
|
0.01923105,
|
||||||
|
0.045092683,
|
||||||
|
0.045125954,
|
||||||
|
0.09655961,
|
||||||
|
0.014908797,
|
||||||
|
0.059611585,
|
||||||
|
0.03066662,
|
||||||
|
0.05882299,
|
||||||
|
0.111484826,
|
||||||
|
0.016632542,
|
||||||
|
0.011590394,
|
||||||
|
-0.023702666,
|
||||||
|
-0.008617484,
|
||||||
|
-0.055030316,
|
||||||
|
0.047606383,
|
||||||
|
-0.014632687,
|
||||||
|
-0.014156344,
|
||||||
|
0.069926,
|
||||||
|
0.032047603,
|
||||||
|
0.042642817,
|
||||||
|
-0.053942375,
|
||||||
|
0.031047028,
|
||||||
|
0.009216673,
|
||||||
|
0.033024028,
|
||||||
|
-0.019033706,
|
||||||
|
0.005568194,
|
||||||
|
-0.014985451,
|
||||||
|
-0.09193244,
|
||||||
|
-0.03210824,
|
||||||
|
0.015367608,
|
||||||
|
0.029150328,
|
||||||
|
0.01250386,
|
||||||
|
-0.004827391,
|
||||||
|
0.023345906,
|
||||||
|
-0.028271332,
|
||||||
|
-0.08454125,
|
||||||
|
0.051068563,
|
||||||
|
-0.0133641455,
|
||||||
|
-0.029022738,
|
||||||
|
-0.02258452,
|
||||||
|
0.010884119,
|
||||||
|
-0.009810021,
|
||||||
|
0.049751773,
|
||||||
|
-0.0032637494,
|
||||||
|
-0.038813565,
|
||||||
|
0.027924104,
|
||||||
|
0.017925078,
|
||||||
|
0.005337612,
|
||||||
|
0.058691237,
|
||||||
|
0.09577674,
|
||||||
|
-0.014308608,
|
||||||
|
0.006972794,
|
||||||
|
-0.02733344,
|
||||||
|
0.06912433,
|
||||||
|
0.05727631,
|
||||||
|
0.03206042,
|
||||||
|
0.0042422824,
|
||||||
|
-1.6766318e-08,
|
||||||
|
-0.036354303,
|
||||||
|
-0.09146416,
|
||||||
|
-0.026319364,
|
||||||
|
-0.007941995,
|
||||||
|
-0.024127059,
|
||||||
|
0.09896698,
|
||||||
|
-0.04723083,
|
||||||
|
-0.03767135,
|
||||||
|
-0.029419973,
|
||||||
|
-0.022513283,
|
||||||
|
0.04125822,
|
||||||
|
-0.0011487947,
|
||||||
|
-0.05570366,
|
||||||
|
0.020679709,
|
||||||
|
-0.038118906,
|
||||||
|
-0.0524994,
|
||||||
|
-0.02624128,
|
||||||
|
-0.05336954,
|
||||||
|
-0.040593866,
|
||||||
|
-0.0073642326,
|
||||||
|
-0.0014442836,
|
||||||
|
0.02714257,
|
||||||
|
0.027141048,
|
||||||
|
0.00932513,
|
||||||
|
-0.00026505854,
|
||||||
|
0.038233075,
|
||||||
|
0.037096914,
|
||||||
|
0.08405413,
|
||||||
|
-0.06340637,
|
||||||
|
-0.014856458,
|
||||||
|
0.05038612,
|
||||||
|
0.06703033,
|
||||||
|
0.027668556,
|
||||||
|
-0.04360097,
|
||||||
|
-0.012041474,
|
||||||
|
0.08500689,
|
||||||
|
0.111594744,
|
||||||
|
0.1046117,
|
||||||
|
0.019726463,
|
||||||
|
-0.0003025109,
|
||||||
|
-0.04110389,
|
||||||
|
0.009575226,
|
||||||
|
-0.05285304,
|
||||||
|
-0.0026365265,
|
||||||
|
-0.031144748,
|
||||||
|
-0.08860188,
|
||||||
|
-0.06762232,
|
||||||
|
-0.07451522,
|
||||||
|
-0.053012833,
|
||||||
|
-0.09560941,
|
||||||
|
-0.05273455,
|
||||||
|
0.013032144,
|
||||||
|
0.0029190276,
|
||||||
|
0.041905046,
|
||||||
|
-0.04522114,
|
||||||
|
0.016730292,
|
||||||
|
0.017214278,
|
||||||
|
0.021578068,
|
||||||
|
-0.03718778,
|
||||||
|
0.02353425,
|
||||||
|
0.052041385,
|
||||||
|
0.06444499,
|
||||||
|
0.02387539,
|
||||||
|
-0.025236009
|
||||||
|
],
|
||||||
|
"index": 0,
|
||||||
|
"object": "embedding"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"model": "all-minilm:l6-v2",
|
||||||
|
"object": "list",
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 6,
|
||||||
|
"total_tokens": 6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
|
@ -22,7 +22,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:01.887809Z",
|
"created_at": "2025-09-03T17:37:50.436472Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -40,7 +40,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:01.942369Z",
|
"created_at": "2025-09-03T17:37:50.478138Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -58,7 +58,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:01.99605Z",
|
"created_at": "2025-09-03T17:37:50.519952Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -76,7 +76,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:02.049974Z",
|
"created_at": "2025-09-03T17:37:50.561433Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -94,7 +94,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:02.102027Z",
|
"created_at": "2025-09-03T17:37:50.603624Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -112,7 +112,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:02.158416Z",
|
"created_at": "2025-09-03T17:37:50.645851Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -130,7 +130,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:02.211753Z",
|
"created_at": "2025-09-03T17:37:50.688403Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -148,7 +148,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:02.265564Z",
|
"created_at": "2025-09-03T17:37:50.72991Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -166,7 +166,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:02.31618Z",
|
"created_at": "2025-09-03T17:37:50.771635Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -184,7 +184,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:02.370325Z",
|
"created_at": "2025-09-03T17:37:50.813711Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -202,7 +202,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:02.424667Z",
|
"created_at": "2025-09-03T17:37:50.856201Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -220,7 +220,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:02.47913Z",
|
"created_at": "2025-09-03T17:37:50.899048Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -238,15 +238,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:02.536984Z",
|
"created_at": "2025-09-03T17:37:50.94069Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 1042724125,
|
"total_duration": 688370708,
|
||||||
"load_duration": 86161375,
|
"load_duration": 107469833,
|
||||||
"prompt_eval_count": 399,
|
"prompt_eval_count": 399,
|
||||||
"prompt_eval_duration": 305000000,
|
"prompt_eval_duration": 74988334,
|
||||||
"eval_count": 13,
|
"eval_count": 13,
|
||||||
"eval_duration": 650000000,
|
"eval_duration": 505216458,
|
||||||
"response": "",
|
"response": "",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:11.938867Z",
|
"created_at": "2025-09-03T17:37:56.566151Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -40,7 +40,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:11.991247Z",
|
"created_at": "2025-09-03T17:37:56.609308Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -58,7 +58,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.043953Z",
|
"created_at": "2025-09-03T17:37:56.651314Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -76,7 +76,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.096001Z",
|
"created_at": "2025-09-03T17:37:56.693185Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -94,7 +94,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.150454Z",
|
"created_at": "2025-09-03T17:37:56.734643Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -112,7 +112,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.201249Z",
|
"created_at": "2025-09-03T17:37:56.776343Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -130,7 +130,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.252534Z",
|
"created_at": "2025-09-03T17:37:56.81705Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -148,7 +148,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.30063Z",
|
"created_at": "2025-09-03T17:37:56.857959Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -166,7 +166,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.351034Z",
|
"created_at": "2025-09-03T17:37:56.899424Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -184,7 +184,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.405032Z",
|
"created_at": "2025-09-03T17:37:56.939218Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -202,7 +202,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.462645Z",
|
"created_at": "2025-09-03T17:37:56.980065Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -220,7 +220,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.520337Z",
|
"created_at": "2025-09-03T17:37:57.02214Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -238,7 +238,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.575809Z",
|
"created_at": "2025-09-03T17:37:57.0628Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -256,7 +256,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.633724Z",
|
"created_at": "2025-09-03T17:37:57.106061Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -274,7 +274,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.683133Z",
|
"created_at": "2025-09-03T17:37:57.1492Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -292,7 +292,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.734309Z",
|
"created_at": "2025-09-03T17:37:57.190075Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -310,7 +310,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.785917Z",
|
"created_at": "2025-09-03T17:37:57.23178Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -328,7 +328,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.835705Z",
|
"created_at": "2025-09-03T17:37:57.272738Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -346,7 +346,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.886509Z",
|
"created_at": "2025-09-03T17:37:57.313855Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -364,7 +364,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.937134Z",
|
"created_at": "2025-09-03T17:37:57.354964Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -382,7 +382,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:12.988532Z",
|
"created_at": "2025-09-03T17:37:57.395971Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -400,7 +400,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.041798Z",
|
"created_at": "2025-09-03T17:37:57.438471Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -418,7 +418,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.095443Z",
|
"created_at": "2025-09-03T17:37:57.479796Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -436,7 +436,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.151402Z",
|
"created_at": "2025-09-03T17:37:57.520641Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -454,7 +454,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.203462Z",
|
"created_at": "2025-09-03T17:37:57.561511Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -472,7 +472,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.254567Z",
|
"created_at": "2025-09-03T17:37:57.602875Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -490,7 +490,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.305865Z",
|
"created_at": "2025-09-03T17:37:57.643406Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -508,7 +508,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.357658Z",
|
"created_at": "2025-09-03T17:37:57.684279Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -526,7 +526,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.407773Z",
|
"created_at": "2025-09-03T17:37:57.725699Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -544,7 +544,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.458919Z",
|
"created_at": "2025-09-03T17:37:57.766658Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -562,7 +562,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.510456Z",
|
"created_at": "2025-09-03T17:37:57.80738Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -580,7 +580,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.565948Z",
|
"created_at": "2025-09-03T17:37:57.848466Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -598,7 +598,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.619155Z",
|
"created_at": "2025-09-03T17:37:57.889056Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -616,7 +616,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.672754Z",
|
"created_at": "2025-09-03T17:37:57.931554Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -634,7 +634,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.729473Z",
|
"created_at": "2025-09-03T17:37:57.974754Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -652,7 +652,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.788666Z",
|
"created_at": "2025-09-03T17:37:58.016978Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -670,7 +670,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.850575Z",
|
"created_at": "2025-09-03T17:37:58.057942Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -688,7 +688,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.904807Z",
|
"created_at": "2025-09-03T17:37:58.099015Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -706,7 +706,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:13.958524Z",
|
"created_at": "2025-09-03T17:37:58.140531Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -724,7 +724,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.011742Z",
|
"created_at": "2025-09-03T17:37:58.181382Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -742,7 +742,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.064933Z",
|
"created_at": "2025-09-03T17:37:58.223318Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -760,7 +760,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.116454Z",
|
"created_at": "2025-09-03T17:37:58.26358Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -778,7 +778,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.172682Z",
|
"created_at": "2025-09-03T17:37:58.305496Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -796,7 +796,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.227654Z",
|
"created_at": "2025-09-03T17:37:58.347254Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -814,7 +814,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.282068Z",
|
"created_at": "2025-09-03T17:37:58.390044Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -832,7 +832,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.334565Z",
|
"created_at": "2025-09-03T17:37:58.430867Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -850,7 +850,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.383532Z",
|
"created_at": "2025-09-03T17:37:58.471376Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -868,7 +868,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.432138Z",
|
"created_at": "2025-09-03T17:37:58.51208Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -886,7 +886,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.480995Z",
|
"created_at": "2025-09-03T17:37:58.553226Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -904,7 +904,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.531968Z",
|
"created_at": "2025-09-03T17:37:58.594787Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -922,7 +922,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.584044Z",
|
"created_at": "2025-09-03T17:37:58.63466Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -940,7 +940,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.635691Z",
|
"created_at": "2025-09-03T17:37:58.674628Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -958,7 +958,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.68837Z",
|
"created_at": "2025-09-03T17:37:58.714616Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -976,7 +976,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.73985Z",
|
"created_at": "2025-09-03T17:37:58.754906Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -994,7 +994,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.792412Z",
|
"created_at": "2025-09-03T17:37:58.795048Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1012,7 +1012,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.845872Z",
|
"created_at": "2025-09-03T17:37:58.835297Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1030,7 +1030,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.900102Z",
|
"created_at": "2025-09-03T17:37:58.875738Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1048,7 +1048,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:14.954589Z",
|
"created_at": "2025-09-03T17:37:58.91604Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1066,7 +1066,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.006629Z",
|
"created_at": "2025-09-03T17:37:58.956596Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1084,7 +1084,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.058561Z",
|
"created_at": "2025-09-03T17:37:58.996664Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1102,7 +1102,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.111954Z",
|
"created_at": "2025-09-03T17:37:59.037796Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1120,7 +1120,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.169173Z",
|
"created_at": "2025-09-03T17:37:59.078586Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1138,7 +1138,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.222569Z",
|
"created_at": "2025-09-03T17:37:59.119448Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1156,7 +1156,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.275795Z",
|
"created_at": "2025-09-03T17:37:59.160318Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1174,7 +1174,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.3327Z",
|
"created_at": "2025-09-03T17:37:59.201852Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1192,7 +1192,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.389931Z",
|
"created_at": "2025-09-03T17:37:59.243763Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1210,7 +1210,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.442349Z",
|
"created_at": "2025-09-03T17:37:59.284948Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1228,7 +1228,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.494175Z",
|
"created_at": "2025-09-03T17:37:59.325598Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1246,7 +1246,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.545764Z",
|
"created_at": "2025-09-03T17:37:59.366289Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1264,7 +1264,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.599099Z",
|
"created_at": "2025-09-03T17:37:59.406764Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1282,7 +1282,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.649852Z",
|
"created_at": "2025-09-03T17:37:59.447922Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1300,7 +1300,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.698222Z",
|
"created_at": "2025-09-03T17:37:59.488486Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1318,7 +1318,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.747168Z",
|
"created_at": "2025-09-03T17:37:59.529Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1336,7 +1336,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.797196Z",
|
"created_at": "2025-09-03T17:37:59.569417Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1354,7 +1354,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.845587Z",
|
"created_at": "2025-09-03T17:37:59.610542Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1372,7 +1372,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.897171Z",
|
"created_at": "2025-09-03T17:37:59.651411Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1390,7 +1390,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.944524Z",
|
"created_at": "2025-09-03T17:37:59.69241Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1408,7 +1408,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:15.994467Z",
|
"created_at": "2025-09-03T17:37:59.732339Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1426,7 +1426,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.045224Z",
|
"created_at": "2025-09-03T17:37:59.772462Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1444,7 +1444,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.093853Z",
|
"created_at": "2025-09-03T17:37:59.812507Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1462,7 +1462,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.144847Z",
|
"created_at": "2025-09-03T17:37:59.852762Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1480,7 +1480,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.197888Z",
|
"created_at": "2025-09-03T17:37:59.892984Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1498,7 +1498,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.250854Z",
|
"created_at": "2025-09-03T17:37:59.933555Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1516,7 +1516,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.301995Z",
|
"created_at": "2025-09-03T17:37:59.973778Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1534,7 +1534,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.352508Z",
|
"created_at": "2025-09-03T17:38:00.014923Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1552,7 +1552,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.40259Z",
|
"created_at": "2025-09-03T17:38:00.057464Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1570,7 +1570,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.453514Z",
|
"created_at": "2025-09-03T17:38:00.09902Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1588,7 +1588,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.50378Z",
|
"created_at": "2025-09-03T17:38:00.140492Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1606,7 +1606,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.554395Z",
|
"created_at": "2025-09-03T17:38:00.180239Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1624,7 +1624,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.605795Z",
|
"created_at": "2025-09-03T17:38:00.220364Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1642,7 +1642,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.656313Z",
|
"created_at": "2025-09-03T17:38:00.26097Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1660,7 +1660,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.706438Z",
|
"created_at": "2025-09-03T17:38:00.301228Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1678,7 +1678,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.756444Z",
|
"created_at": "2025-09-03T17:38:00.341631Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1696,7 +1696,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.807687Z",
|
"created_at": "2025-09-03T17:38:00.383006Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1714,7 +1714,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.85835Z",
|
"created_at": "2025-09-03T17:38:00.423509Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1732,7 +1732,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.909311Z",
|
"created_at": "2025-09-03T17:38:00.464702Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1750,7 +1750,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:16.959327Z",
|
"created_at": "2025-09-03T17:38:00.505914Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1768,7 +1768,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:17.010211Z",
|
"created_at": "2025-09-03T17:38:00.546505Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1786,7 +1786,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:17.061365Z",
|
"created_at": "2025-09-03T17:38:00.587839Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -1804,15 +1804,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-01T23:14:17.111956Z",
|
"created_at": "2025-09-03T17:38:00.629018Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 5499672375,
|
"total_duration": 4303339291,
|
||||||
"load_duration": 58161750,
|
"load_duration": 156231250,
|
||||||
"prompt_eval_count": 36,
|
"prompt_eval_count": 36,
|
||||||
"prompt_eval_duration": 266000000,
|
"prompt_eval_duration": 81909875,
|
||||||
"eval_count": 100,
|
"eval_count": 100,
|
||||||
"eval_duration": 5174000000,
|
"eval_duration": 4064559292,
|
||||||
"response": "",
|
"response": "",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"request": {
|
"request": {
|
||||||
"method": "POST",
|
"method": "POST",
|
||||||
"url": "http://localhost:11434/v1/v1/chat/completions",
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
"headers": {},
|
"headers": {},
|
||||||
"body": {
|
"body": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
@ -22,14 +22,14 @@
|
||||||
"body": {
|
"body": {
|
||||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-339",
|
"id": "chatcmpl-442",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"finish_reason": "length",
|
"finish_reason": "length",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null,
|
"logprobs": null,
|
||||||
"message": {
|
"message": {
|
||||||
"content": "I can guide you through the process, but please note that this is not an official OpenAI API call. OpenAI's API terms and conditions prohibit using their models for malicious purposes.\n\nTo test a model like \"text-temperature\" with a temperature of 0 (i.e., no noise or randomness), we'll need to use a third-party library that connects to the OpenAI API. One such library is `transformers`.\n\nFirst, you need to install the `transformers` and `",
|
"content": "I can guide you on how to use the `test-temperature` parameter with OpenAI's API, but please note that using a temperature of 0 may not produce meaningful results. Temperature is a hyperparameter that controls the level of randomness in the model's output.\n\nOpenAI's API uses a variant of the GPT-3 model, which is trained on a large corpus of text data. The `test-temperature` parameter allows you to adjust the level of randomness in the model's output",
|
||||||
"refusal": null,
|
"refusal": null,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"annotations": null,
|
"annotations": null,
|
||||||
|
@ -39,7 +39,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754510065,
|
"created": 1756921254,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue