forked from phoenix-oss/llama-stack-mirror
Merge branch 'main' into eval_api_final
This commit is contained in:
commit
46f2ba5910
46 changed files with 1030 additions and 423 deletions
|
@ -1,9 +0,0 @@
|
|||
---
|
||||
description: General rules always applicable across the project
|
||||
globs:
|
||||
alwaysApply: true
|
||||
---
|
||||
# Style
|
||||
|
||||
- Comments must add value to code. Don't write filler comments explaining what you are doing next; they just add noise.
|
||||
- Add a comment to clarify surprising behavior which would not be obvious. Good variable naming and clear code organization is more important.
|
17
.github/dependabot.yml
vendored
17
.github/dependabot.yml
vendored
|
@ -5,4 +5,19 @@ updates:
|
|||
- package-ecosystem: "github-actions"
|
||||
directory: "/" # Will use the default workflow location of `.github/workflows`
|
||||
schedule:
|
||||
interval: "daily"
|
||||
interval: "weekly"
|
||||
day: "saturday"
|
||||
commit-message:
|
||||
prefix: chore(github-deps)
|
||||
- package-ecosystem: "uv"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "saturday"
|
||||
# ignore all non-security updates: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#open-pull-requests-limit
|
||||
open-pull-requests-limit: 0
|
||||
labels:
|
||||
- type/dependencies
|
||||
- python
|
||||
commit-message:
|
||||
prefix: chore(python-deps)
|
||||
|
|
20
.github/workflows/integration-tests.yml
vendored
20
.github/workflows/integration-tests.yml
vendored
|
@ -1,4 +1,4 @@
|
|||
name: Integration tests
|
||||
name: Integration Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
|
@ -15,8 +15,14 @@ on:
|
|||
- '.github/workflows/integration-tests.yml' # This workflow
|
||||
|
||||
jobs:
|
||||
ollama:
|
||||
test-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
# Listing tests manually since some of them currently fail
|
||||
# TODO: generate matrix list from tests/integration when fixed
|
||||
test-type: [inference, datasets, inspect, scoring, post_training, providers]
|
||||
fail-fast: false # we want to run all tests regardless of failure
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
|
@ -43,6 +49,8 @@ jobs:
|
|||
run: |
|
||||
uv sync --extra dev --extra test
|
||||
uv pip install ollama faiss-cpu
|
||||
# always test against the latest version of the client
|
||||
uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
|
||||
uv pip install -e .
|
||||
|
||||
- name: Wait for Ollama to start
|
||||
|
@ -72,17 +80,17 @@ jobs:
|
|||
echo "Waiting for Llama Stack server..."
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
|
||||
echo " Llama Stack server is up!"
|
||||
echo "Llama Stack server is up!"
|
||||
exit 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo " Llama Stack server failed to start"
|
||||
echo "Llama Stack server failed to start"
|
||||
cat server.log
|
||||
exit 1
|
||||
|
||||
- name: Run Inference Integration Tests
|
||||
- name: Run Integration Tests
|
||||
env:
|
||||
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
||||
run: |
|
||||
uv run pytest -v tests/integration/inference --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2
|
||||
uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2
|
||||
|
|
|
@ -86,7 +86,7 @@ LLAMA_STACK_CONFIG=
|
|||
|
||||
And then use this dotenv file when running client SDK tests via the following:
|
||||
```bash
|
||||
uv run --env-file .env -- pytest -v tests/api/inference/test_text_inference.py
|
||||
uv run --env-file .env -- pytest -v tests/integration/inference/test_text_inference.py
|
||||
```
|
||||
|
||||
## Pre-commit Hooks
|
||||
|
|
|
@ -4,7 +4,8 @@
|
|||
[](https://pypi.org/project/llama-stack/)
|
||||
[](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
|
||||
[](https://discord.gg/llama-stack)
|
||||

|
||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||
|
||||
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb)
|
||||
|
||||
|
|
64
docs/_static/llama-stack-spec.html
vendored
64
docs/_static/llama-stack-spec.html
vendored
|
@ -1126,6 +1126,7 @@
|
|||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Benchmark"
|
||||
"$ref": "#/components/schemas/Benchmark"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1168,14 +1169,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/Dataset"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/Dataset"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1250,14 +1244,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/Model"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/Model"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1332,14 +1319,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/Shield"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/Shield"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1642,14 +1622,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1691,14 +1664,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1773,14 +1739,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/FileUploadResponse"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/FileUploadResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1882,14 +1841,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/VectorDB"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/VectorDB"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
29
docs/_static/llama-stack-spec.yaml
vendored
29
docs/_static/llama-stack-spec.yaml
vendored
|
@ -774,6 +774,7 @@ paths:
|
|||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Benchmark'
|
||||
$ref: '#/components/schemas/Benchmark'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -802,9 +803,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Dataset'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/Dataset'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -855,9 +854,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Model'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/Model'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -908,9 +905,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Shield'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/Shield'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1112,9 +1107,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1142,9 +1135,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/PostTrainingJobStatusResponse'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1195,9 +1186,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/FileUploadResponse'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/FileUploadResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1266,9 +1255,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorDB'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/VectorDB'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import sys
|
||||
import fire
|
||||
import ruamel.yaml as yaml
|
||||
|
||||
|
@ -21,7 +21,7 @@ from llama_stack.distribution.stack import LlamaStack # noqa: E402
|
|||
|
||||
from .pyopenapi.options import Options # noqa: E402
|
||||
from .pyopenapi.specification import Info, Server # noqa: E402
|
||||
from .pyopenapi.utility import Specification # noqa: E402
|
||||
from .pyopenapi.utility import Specification, validate_api_method_return_types # noqa: E402
|
||||
|
||||
|
||||
def str_presenter(dumper, data):
|
||||
|
@ -39,6 +39,14 @@ def main(output_dir: str):
|
|||
if not output_dir.exists():
|
||||
raise ValueError(f"Directory {output_dir} does not exist")
|
||||
|
||||
# Validate API protocols before generating spec
|
||||
print("Validating API method return types...")
|
||||
return_type_errors = validate_api_method_return_types()
|
||||
if return_type_errors:
|
||||
print("\nAPI Method Return Type Validation Errors:\n")
|
||||
for error in return_type_errors:
|
||||
print(error)
|
||||
sys.exit(1)
|
||||
now = str(datetime.now())
|
||||
print(
|
||||
"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now
|
||||
|
|
|
@ -6,16 +6,19 @@
|
|||
|
||||
import json
|
||||
import typing
|
||||
import inspect
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import TextIO
|
||||
from typing import Any, Dict, List, Optional, Protocol, Type, Union, get_type_hints, get_origin, get_args
|
||||
|
||||
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
|
||||
from llama_stack.distribution.resolver import api_protocol_map
|
||||
|
||||
from .generator import Generator
|
||||
from .options import Options
|
||||
from .specification import Document
|
||||
|
||||
|
||||
THIS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
|
@ -114,3 +117,37 @@ class Specification:
|
|||
)
|
||||
|
||||
f.write(html)
|
||||
|
||||
def is_optional_type(type_: Any) -> bool:
|
||||
"""Check if a type is Optional."""
|
||||
origin = get_origin(type_)
|
||||
args = get_args(type_)
|
||||
return origin is Optional or (origin is Union and type(None) in args)
|
||||
|
||||
|
||||
def validate_api_method_return_types() -> List[str]:
|
||||
"""Validate that all API methods have proper return types."""
|
||||
errors = []
|
||||
protocols = api_protocol_map()
|
||||
|
||||
for protocol_name, protocol in protocols.items():
|
||||
methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
|
||||
|
||||
for method_name, method in methods:
|
||||
if not hasattr(method, '__webmethod__'):
|
||||
continue
|
||||
|
||||
# Only check GET methods
|
||||
if method.__webmethod__.method != "GET":
|
||||
continue
|
||||
|
||||
hints = get_type_hints(method)
|
||||
|
||||
if 'return' not in hints:
|
||||
errors.append(f"Method {protocol_name}.{method_name} has no return type annotation")
|
||||
else:
|
||||
return_type = hints['return']
|
||||
if is_optional_type(return_type):
|
||||
errors.append(f"Method {protocol_name}.{method_name} returns Optional type")
|
||||
|
||||
return errors
|
||||
|
|
|
@ -185,8 +185,12 @@ llama stack build --config llama_stack/templates/ollama/build.yaml
|
|||
:::
|
||||
|
||||
:::{tab-item} Building Container
|
||||
> [!TIP]
|
||||
> Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.
|
||||
|
||||
```{admonition} Podman Alternative
|
||||
:class: tip
|
||||
|
||||
Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.
|
||||
```
|
||||
|
||||
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
|
||||
|
||||
|
|
|
@ -92,6 +92,8 @@ Interactive pages for users to play with and explore Llama Stack API capabilitie
|
|||
|
||||
## Starting the Llama Stack Playground
|
||||
|
||||
### Llama CLI
|
||||
|
||||
To start the Llama Stack Playground, run the following commands:
|
||||
|
||||
1. Start up the Llama Stack API server
|
||||
|
@ -107,3 +109,28 @@ cd llama_stack/distribution/ui
|
|||
pip install -r requirements.txt
|
||||
streamlit run app.py
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
Playground can also be started in a docker image:
|
||||
|
||||
```sh
|
||||
export LLAMA_STACK_URL=http://localhost:11434
|
||||
|
||||
docker run \
|
||||
-p 8501:8501 \
|
||||
-e LLAMA_STACK_ENDPOINT=$LLAMA_STACK_URL \
|
||||
quay.io/jland/llama-stack-playground
|
||||
```
|
||||
|
||||
## Configurable Environment Variables
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Environment Variable | Description | Default Value |
|
||||
|----------------------------|------------------------------------|---------------------------|
|
||||
| LLAMA_STACK_ENDPOINT | The endpoint for the Llama Stack | http://localhost:8321 |
|
||||
| FIREWORKS_API_KEY | API key for Fireworks provider | (empty string) |
|
||||
| TOGETHER_API_KEY | API key for Together provider | (empty string) |
|
||||
| SAMBANOVA_API_KEY | API key for SambaNova provider | (empty string) |
|
||||
| OPENAI_API_KEY | API key for OpenAI provider | (empty string) |
|
||||
|
|
|
@ -3,21 +3,36 @@ orphan: true
|
|||
---
|
||||
# Qdrant
|
||||
|
||||
[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It
|
||||
[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
|
||||
allows you to store and query vectors directly in memory.
|
||||
That means you'll get fast and efficient vector retrieval.
|
||||
|
||||
> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in
|
||||
> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative.
|
||||
>
|
||||
> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\]
|
||||
|
||||
|
||||
|
||||
## Features
|
||||
|
||||
- Easy to use
|
||||
- Lightweight and easy to use
|
||||
- Fully integrated with Llama Stack
|
||||
- Apache 2.0 license terms
|
||||
- Store embeddings and their metadata
|
||||
- Supports search by
|
||||
[Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/)
|
||||
and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search
|
||||
- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/)
|
||||
- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/)
|
||||
- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/)
|
||||
|
||||
## Usage
|
||||
|
||||
To use Qdrant in your Llama Stack project, follow these steps:
|
||||
|
||||
1. Install the necessary dependencies.
|
||||
2. Configure your Llama Stack project to use Faiss.
|
||||
2. Configure your Llama Stack project to use Qdrant.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## Installation
|
||||
|
|
|
@ -201,7 +201,7 @@ class Datasets(Protocol):
|
|||
async def get_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
) -> Optional[Dataset]: ...
|
||||
) -> Dataset: ...
|
||||
|
||||
@webmethod(route="/datasets", method="GET")
|
||||
async def list_datasets(self) -> ListDatasetsResponse: ...
|
||||
|
|
|
@ -117,7 +117,7 @@ class Eval(Protocol):
|
|||
"""
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
|
||||
async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
|
||||
async def job_status(self, benchmark_id: str, job_id: str) -> JobStatus:
|
||||
"""Get the status of a job.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||
|
|
|
@ -115,7 +115,7 @@ class Files(Protocol):
|
|||
async def get_upload_session_info(
|
||||
self,
|
||||
upload_id: str,
|
||||
) -> Optional[FileUploadResponse]:
|
||||
) -> FileUploadResponse:
|
||||
"""
|
||||
Returns information about an existsing upload session
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ class Models(Protocol):
|
|||
async def get_model(
|
||||
self,
|
||||
model_id: str,
|
||||
) -> Optional[Model]: ...
|
||||
) -> Model: ...
|
||||
|
||||
@webmethod(route="/models", method="POST")
|
||||
async def register_model(
|
||||
|
|
|
@ -202,10 +202,10 @@ class PostTraining(Protocol):
|
|||
async def get_training_jobs(self) -> ListPostTrainingJobsResponse: ...
|
||||
|
||||
@webmethod(route="/post-training/job/status", method="GET")
|
||||
async def get_training_job_status(self, job_uuid: str) -> Optional[PostTrainingJobStatusResponse]: ...
|
||||
async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: ...
|
||||
|
||||
@webmethod(route="/post-training/job/cancel", method="POST")
|
||||
async def cancel_training_job(self, job_uuid: str) -> None: ...
|
||||
|
||||
@webmethod(route="/post-training/job/artifacts", method="GET")
|
||||
async def get_training_job_artifacts(self, job_uuid: str) -> Optional[PostTrainingJobArtifactsResponse]: ...
|
||||
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: ...
|
||||
|
|
|
@ -135,7 +135,7 @@ class ScoringFunctions(Protocol):
|
|||
async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ...
|
||||
|
||||
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
|
||||
async def get_scoring_function(self, scoring_fn_id: str, /) -> Optional[ScoringFn]: ...
|
||||
async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn: ...
|
||||
|
||||
@webmethod(route="/scoring-functions", method="POST")
|
||||
async def register_scoring_function(
|
||||
|
|
|
@ -49,7 +49,7 @@ class Shields(Protocol):
|
|||
async def list_shields(self) -> ListShieldsResponse: ...
|
||||
|
||||
@webmethod(route="/shields/{identifier:path}", method="GET")
|
||||
async def get_shield(self, identifier: str) -> Optional[Shield]: ...
|
||||
async def get_shield(self, identifier: str) -> Shield: ...
|
||||
|
||||
@webmethod(route="/shields", method="POST")
|
||||
async def register_shield(
|
||||
|
|
|
@ -50,7 +50,7 @@ class VectorDBs(Protocol):
|
|||
async def get_vector_db(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
) -> Optional[VectorDB]: ...
|
||||
) -> VectorDB: ...
|
||||
|
||||
@webmethod(route="/vector-dbs", method="POST")
|
||||
async def register_vector_db(
|
||||
|
|
|
@ -219,8 +219,11 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|||
async def list_models(self) -> ListModelsResponse:
|
||||
return ListModelsResponse(data=await self.get_all_with_type("model"))
|
||||
|
||||
async def get_model(self, model_id: str) -> Optional[Model]:
|
||||
return await self.get_object_by_identifier("model", model_id)
|
||||
async def get_model(self, model_id: str) -> Model:
|
||||
model = await self.get_object_by_identifier("model", model_id)
|
||||
if model is None:
|
||||
raise ValueError(f"Model '{model_id}' not found")
|
||||
return model
|
||||
|
||||
async def register_model(
|
||||
self,
|
||||
|
@ -267,8 +270,11 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
|
|||
async def list_shields(self) -> ListShieldsResponse:
|
||||
return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value))
|
||||
|
||||
async def get_shield(self, identifier: str) -> Optional[Shield]:
|
||||
return await self.get_object_by_identifier("shield", identifier)
|
||||
async def get_shield(self, identifier: str) -> Shield:
|
||||
shield = await self.get_object_by_identifier("shield", identifier)
|
||||
if shield is None:
|
||||
raise ValueError(f"Shield '{identifier}' not found")
|
||||
return shield
|
||||
|
||||
async def register_shield(
|
||||
self,
|
||||
|
@ -303,8 +309,11 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
|||
async def list_vector_dbs(self) -> ListVectorDBsResponse:
|
||||
return ListVectorDBsResponse(data=await self.get_all_with_type("vector_db"))
|
||||
|
||||
async def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]:
|
||||
return await self.get_object_by_identifier("vector_db", vector_db_id)
|
||||
async def get_vector_db(self, vector_db_id: str) -> VectorDB:
|
||||
vector_db = await self.get_object_by_identifier("vector_db", vector_db_id)
|
||||
if vector_db is None:
|
||||
raise ValueError(f"Vector DB '{vector_db_id}' not found")
|
||||
return vector_db
|
||||
|
||||
async def register_vector_db(
|
||||
self,
|
||||
|
@ -355,8 +364,11 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
|
|||
async def list_datasets(self) -> ListDatasetsResponse:
|
||||
return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value))
|
||||
|
||||
async def get_dataset(self, dataset_id: str) -> Optional[Dataset]:
|
||||
return await self.get_object_by_identifier("dataset", dataset_id)
|
||||
async def get_dataset(self, dataset_id: str) -> Dataset:
|
||||
dataset = await self.get_object_by_identifier("dataset", dataset_id)
|
||||
if dataset is None:
|
||||
raise ValueError(f"Dataset '{dataset_id}' not found")
|
||||
return dataset
|
||||
|
||||
async def register_dataset(
|
||||
self,
|
||||
|
@ -408,8 +420,11 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
|
|||
async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
|
||||
return ListScoringFunctionsResponse(data=await self.get_all_with_type(ResourceType.scoring_function.value))
|
||||
|
||||
async def get_scoring_function(self, scoring_fn_id: str) -> Optional[ScoringFn]:
|
||||
return await self.get_object_by_identifier("scoring_function", scoring_fn_id)
|
||||
async def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn:
|
||||
scoring_fn = await self.get_object_by_identifier("scoring_function", scoring_fn_id)
|
||||
if scoring_fn is None:
|
||||
raise ValueError(f"Scoring function '{scoring_fn_id}' not found")
|
||||
return scoring_fn
|
||||
|
||||
async def register_scoring_function(
|
||||
self,
|
||||
|
@ -445,8 +460,11 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
|
|||
async def list_benchmarks(self) -> ListBenchmarksResponse:
|
||||
return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))
|
||||
|
||||
async def get_benchmark(self, benchmark_id: str) -> Optional[Benchmark]:
|
||||
return await self.get_object_by_identifier("benchmark", benchmark_id)
|
||||
async def get_benchmark(self, benchmark_id: str) -> Benchmark:
|
||||
benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
|
||||
if benchmark is None:
|
||||
raise ValueError(f"Benchmark '{benchmark_id}' not found")
|
||||
return benchmark
|
||||
|
||||
async def register_benchmark(
|
||||
self,
|
||||
|
@ -490,7 +508,10 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|||
return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group"))
|
||||
|
||||
async def get_tool_group(self, toolgroup_id: str) -> ToolGroup:
|
||||
return await self.get_object_by_identifier("tool_group", toolgroup_id)
|
||||
tool_group = await self.get_object_by_identifier("tool_group", toolgroup_id)
|
||||
if tool_group is None:
|
||||
raise ValueError(f"Tool group '{toolgroup_id}' not found")
|
||||
return tool_group
|
||||
|
||||
async def get_tool(self, tool_name: str) -> Tool:
|
||||
return await self.get_object_by_identifier("tool", tool_name)
|
||||
|
|
11
llama_stack/distribution/ui/Containerfile
Normal file
11
llama_stack/distribution/ui/Containerfile
Normal file
|
@ -0,0 +1,11 @@
|
|||
# More info on playground configuration can be found here:
|
||||
# https://llama-stack.readthedocs.io/en/latest/playground
|
||||
|
||||
FROM python:3.9-slim
|
||||
WORKDIR /app
|
||||
COPY . /app/
|
||||
RUN /usr/local/bin/python -m pip install --upgrade pip && \
|
||||
/usr/local/bin/pip3 install -r requirements.txt
|
||||
EXPOSE 8501
|
||||
|
||||
ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
|
@ -40,3 +40,13 @@ cd llama_stack/distribution/ui
|
|||
pip install -r requirements.txt
|
||||
streamlit run app.py
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Environment Variable | Description | Default Value |
|
||||
|----------------------------|------------------------------------|---------------------------|
|
||||
| LLAMA_STACK_ENDPOINT | The endpoint for the Llama Stack | http://localhost:8321 |
|
||||
| FIREWORKS_API_KEY | API key for Fireworks provider | (empty string) |
|
||||
| TOGETHER_API_KEY | API key for Together provider | (empty string) |
|
||||
| SAMBANOVA_API_KEY | API key for SambaNova provider | (empty string) |
|
||||
| OPENAI_API_KEY | API key for OpenAI provider | (empty string) |
|
||||
|
|
|
@ -614,118 +614,133 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
logger.debug(f"completion message with EOM (iter: {n_iter}): {str(message)}")
|
||||
input_messages = input_messages + [message]
|
||||
else:
|
||||
logger.debug(f"completion message (iter: {n_iter}) from the model: {str(message)}")
|
||||
# 1. Start the tool execution step and progress
|
||||
step_id = str(uuid.uuid4())
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepStartPayload(
|
||||
step_type=StepType.tool_execution.value,
|
||||
step_id=step_id,
|
||||
)
|
||||
)
|
||||
)
|
||||
tool_call = message.tool_calls[0]
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepProgressPayload(
|
||||
step_type=StepType.tool_execution.value,
|
||||
step_id=step_id,
|
||||
tool_call=tool_call,
|
||||
delta=ToolCallDelta(
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
tool_call=tool_call,
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
input_messages = input_messages + [message]
|
||||
|
||||
# If tool is a client tool, yield CompletionMessage and return
|
||||
if tool_call.tool_name in client_tools:
|
||||
# NOTE: mark end_of_message to indicate to client that it may
|
||||
# call the tool and continue the conversation with the tool's response.
|
||||
message.stop_reason = StopReason.end_of_message
|
||||
# Process tool calls in the message
|
||||
client_tool_calls = []
|
||||
non_client_tool_calls = []
|
||||
|
||||
# Separate client and non-client tool calls
|
||||
for tool_call in message.tool_calls:
|
||||
if tool_call.tool_name in client_tools:
|
||||
client_tool_calls.append(tool_call)
|
||||
else:
|
||||
non_client_tool_calls.append(tool_call)
|
||||
|
||||
# Process non-client tool calls first
|
||||
for tool_call in non_client_tool_calls:
|
||||
step_id = str(uuid.uuid4())
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepStartPayload(
|
||||
step_type=StepType.tool_execution.value,
|
||||
step_id=step_id,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepProgressPayload(
|
||||
step_type=StepType.tool_execution.value,
|
||||
step_id=step_id,
|
||||
delta=ToolCallDelta(
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
tool_call=tool_call,
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Execute the tool call
|
||||
async with tracing.span(
|
||||
"tool_execution",
|
||||
{
|
||||
"tool_name": tool_call.tool_name,
|
||||
"input": message.model_dump_json(),
|
||||
},
|
||||
) as span:
|
||||
tool_execution_start_time = datetime.now(timezone.utc).isoformat()
|
||||
tool_result = await self.execute_tool_call_maybe(
|
||||
session_id,
|
||||
tool_call,
|
||||
)
|
||||
if tool_result.content is None:
|
||||
raise ValueError(
|
||||
f"Tool call result (id: {tool_call.call_id}, name: {tool_call.tool_name}) does not have any content"
|
||||
)
|
||||
result_message = ToolResponseMessage(
|
||||
call_id=tool_call.call_id,
|
||||
content=tool_result.content,
|
||||
)
|
||||
span.set_attribute("output", result_message.model_dump_json())
|
||||
|
||||
# Store tool execution step
|
||||
tool_execution_step = ToolExecutionStep(
|
||||
step_id=step_id,
|
||||
turn_id=turn_id,
|
||||
tool_calls=[tool_call],
|
||||
tool_responses=[
|
||||
ToolResponse(
|
||||
call_id=tool_call.call_id,
|
||||
tool_name=tool_call.tool_name,
|
||||
content=tool_result.content,
|
||||
metadata=tool_result.metadata,
|
||||
)
|
||||
],
|
||||
started_at=tool_execution_start_time,
|
||||
completed_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
# Yield the step completion event
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepCompletePayload(
|
||||
step_type=StepType.tool_execution.value,
|
||||
step_id=step_id,
|
||||
step_details=tool_execution_step,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Add the result message to input_messages for the next iteration
|
||||
input_messages.append(result_message)
|
||||
|
||||
# TODO: add tool-input touchpoint and a "start" event for this step also
|
||||
# but that needs a lot more refactoring of Tool code potentially
|
||||
if (type(result_message.content) is str) and (
|
||||
out_attachment := _interpret_content_as_attachment(result_message.content)
|
||||
):
|
||||
# NOTE: when we push this message back to the model, the model may ignore the
|
||||
# attached file path etc. since the model is trained to only provide a user message
|
||||
# with the summary. We keep all generated attachments and then attach them to final message
|
||||
output_attachments.append(out_attachment)
|
||||
|
||||
# If there are client tool calls, yield a message with only those tool calls
|
||||
if client_tool_calls:
|
||||
await self.storage.set_in_progress_tool_call_step(
|
||||
session_id,
|
||||
turn_id,
|
||||
ToolExecutionStep(
|
||||
step_id=step_id,
|
||||
turn_id=turn_id,
|
||||
tool_calls=[tool_call],
|
||||
tool_calls=client_tool_calls,
|
||||
tool_responses=[],
|
||||
started_at=datetime.now(timezone.utc).isoformat(),
|
||||
),
|
||||
)
|
||||
yield message
|
||||
|
||||
# Create a copy of the message with only client tool calls
|
||||
client_message = message.model_copy(deep=True)
|
||||
client_message.tool_calls = client_tool_calls
|
||||
# NOTE: mark end_of_message to indicate to client that it may
|
||||
# call the tool and continue the conversation with the tool's response.
|
||||
client_message.stop_reason = StopReason.end_of_message
|
||||
|
||||
# Yield the message with client tool calls
|
||||
yield client_message
|
||||
return
|
||||
|
||||
# If tool is a builtin server tool, execute it
|
||||
tool_name = tool_call.tool_name
|
||||
if isinstance(tool_name, BuiltinTool):
|
||||
tool_name = tool_name.value
|
||||
async with tracing.span(
|
||||
"tool_execution",
|
||||
{
|
||||
"tool_name": tool_name,
|
||||
"input": message.model_dump_json(),
|
||||
},
|
||||
) as span:
|
||||
tool_execution_start_time = datetime.now(timezone.utc).isoformat()
|
||||
tool_call = message.tool_calls[0]
|
||||
tool_result = await self.execute_tool_call_maybe(
|
||||
session_id,
|
||||
tool_call,
|
||||
)
|
||||
if tool_result.content is None:
|
||||
raise ValueError(
|
||||
f"Tool call result (id: {tool_call.call_id}, name: {tool_call.tool_name}) does not have any content"
|
||||
)
|
||||
result_messages = [
|
||||
ToolResponseMessage(
|
||||
call_id=tool_call.call_id,
|
||||
content=tool_result.content,
|
||||
)
|
||||
]
|
||||
assert len(result_messages) == 1, "Currently not supporting multiple messages"
|
||||
result_message = result_messages[0]
|
||||
span.set_attribute("output", result_message.model_dump_json())
|
||||
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepCompletePayload(
|
||||
step_type=StepType.tool_execution.value,
|
||||
step_id=step_id,
|
||||
step_details=ToolExecutionStep(
|
||||
step_id=step_id,
|
||||
turn_id=turn_id,
|
||||
tool_calls=[tool_call],
|
||||
tool_responses=[
|
||||
ToolResponse(
|
||||
call_id=result_message.call_id,
|
||||
tool_name=tool_call.tool_name,
|
||||
content=result_message.content,
|
||||
metadata=tool_result.metadata,
|
||||
)
|
||||
],
|
||||
started_at=tool_execution_start_time,
|
||||
completed_at=datetime.now(timezone.utc).isoformat(),
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# TODO: add tool-input touchpoint and a "start" event for this step also
|
||||
# but that needs a lot more refactoring of Tool code potentially
|
||||
if (type(result_message.content) is str) and (
|
||||
out_attachment := _interpret_content_as_attachment(result_message.content)
|
||||
):
|
||||
# NOTE: when we push this message back to the model, the model may ignore the
|
||||
# attached file path etc. since the model is trained to only provide a user message
|
||||
# with the summary. We keep all generated attachments and then attach them to final message
|
||||
output_attachments.append(out_attachment)
|
||||
|
||||
input_messages = input_messages + [message, result_message]
|
||||
|
||||
async def _initialize_tools(
|
||||
self,
|
||||
toolgroups_for_turn: Optional[List[AgentToolGroup]] = None,
|
||||
|
@ -891,16 +906,14 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
if memory_tool and code_interpreter_tool:
|
||||
# if both memory and code_interpreter are available, we download the URLs
|
||||
# and attach the data to the last message.
|
||||
msg = await attachment_message(self.tempdir, url_items)
|
||||
input_messages.append(msg)
|
||||
await attachment_message(self.tempdir, url_items, input_messages[-1])
|
||||
# Since memory is present, add all the data to the memory bank
|
||||
await self.add_to_session_vector_db(session_id, documents)
|
||||
elif code_interpreter_tool:
|
||||
# if only code_interpreter is available, we download the URLs to a tempdir
|
||||
# and attach the path to them as a message to inference with the
|
||||
# assumption that the model invokes the code_interpreter tool with the path
|
||||
msg = await attachment_message(self.tempdir, url_items)
|
||||
input_messages.append(msg)
|
||||
await attachment_message(self.tempdir, url_items, input_messages[-1])
|
||||
elif memory_tool:
|
||||
# if only memory is available, we load the data from the URLs and content items to the memory bank
|
||||
await self.add_to_session_vector_db(session_id, documents)
|
||||
|
@ -967,8 +980,8 @@ async def load_data_from_urls(urls: List[URL]) -> List[str]:
|
|||
return data
|
||||
|
||||
|
||||
async def attachment_message(tempdir: str, urls: List[URL]) -> ToolResponseMessage:
|
||||
content = []
|
||||
async def attachment_message(tempdir: str, urls: List[URL], message: UserMessage) -> None:
|
||||
contents = []
|
||||
|
||||
for url in urls:
|
||||
uri = url.uri
|
||||
|
@ -988,16 +1001,19 @@ async def attachment_message(tempdir: str, urls: List[URL]) -> ToolResponseMessa
|
|||
else:
|
||||
raise ValueError(f"Unsupported URL {url}")
|
||||
|
||||
content.append(
|
||||
contents.append(
|
||||
TextContentItem(
|
||||
text=f'# User provided a file accessible to you at "{filepath}"\nYou can use code_interpreter to load and inspect it.'
|
||||
)
|
||||
)
|
||||
|
||||
return ToolResponseMessage(
|
||||
call_id="",
|
||||
content=content,
|
||||
)
|
||||
if isinstance(message.content, list):
|
||||
message.content.extend(contents)
|
||||
else:
|
||||
if isinstance(message.content, str):
|
||||
message.content = [TextContentItem(text=message.content)] + contents
|
||||
else:
|
||||
message.content = [message.content] + contents
|
||||
|
||||
|
||||
def _interpret_content_as_attachment(
|
||||
|
|
|
@ -227,13 +227,6 @@ class LlamaGuardShield:
|
|||
if len(messages) >= 2 and (messages[0].role == Role.user.value and messages[1].role == Role.user.value):
|
||||
messages = messages[1:]
|
||||
|
||||
for i in range(1, len(messages)):
|
||||
if messages[i].role == messages[i - 1].role:
|
||||
for i, m in enumerate(messages):
|
||||
print(f"{i}: {m.role}: {m.content}")
|
||||
raise ValueError(
|
||||
f"Messages must alternate between user and assistant. Message {i} has the same role as message {i - 1}"
|
||||
)
|
||||
return messages
|
||||
|
||||
async def run(self, messages: List[Message]) -> RunShieldResponse:
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
|
@ -37,7 +38,7 @@ class CodeInterpreterToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime):
|
|||
async def initialize(self):
|
||||
pass
|
||||
|
||||
async def register_tool(self, tool: Tool):
|
||||
async def register_tool(self, tool: Tool) -> None:
|
||||
pass
|
||||
|
||||
async def unregister_tool(self, tool_id: str) -> None:
|
||||
|
@ -65,7 +66,7 @@ class CodeInterpreterToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime):
|
|||
# Use environment variable to control bwrap usage
|
||||
force_disable_bwrap = os.environ.get("DISABLE_CODE_SANDBOX", "").lower() in ("1", "true", "yes")
|
||||
req = CodeExecutionRequest(scripts=[script], use_bwrap=not force_disable_bwrap)
|
||||
res = self.code_executor.execute(req)
|
||||
res = await asyncio.to_thread(self.code_executor.execute, req)
|
||||
pieces = [res["process_status"]]
|
||||
for out_type in ["stdout", "stderr"]:
|
||||
res_out = res[out_type]
|
||||
|
|
19
llama_stack/providers/inline/vector_io/qdrant/__init__.py
Normal file
19
llama_stack/providers/inline/vector_io/qdrant/__init__.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||
|
||||
from .config import QdrantVectorIOConfig
|
||||
|
||||
|
||||
async def get_adapter_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]):
|
||||
from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
|
||||
|
||||
impl = QdrantVectorIOAdapter(config, deps[Api.inference])
|
||||
await impl.initialize()
|
||||
return impl
|
23
llama_stack/providers/inline/vector_io/qdrant/config.py
Normal file
23
llama_stack/providers/inline/vector_io/qdrant/config.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QdrantVectorIOConfig(BaseModel):
|
||||
path: str
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
|
||||
return {
|
||||
"path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
|
||||
}
|
|
@ -92,6 +92,14 @@ def available_providers() -> List[ProviderSpec]:
|
|||
),
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.vector_io,
|
||||
provider_type="inline::qdrant",
|
||||
pip_packages=["qdrant-client"],
|
||||
module="llama_stack.providers.inline.vector_io.qdrant",
|
||||
config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
remote_provider_spec(
|
||||
Api.vector_io,
|
||||
AdapterSpec(
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
import logging
|
||||
import warnings
|
||||
from functools import lru_cache
|
||||
from typing import AsyncIterator, List, Optional, Union
|
||||
|
||||
from openai import APIConnectionError, AsyncOpenAI, BadRequestError
|
||||
|
@ -82,12 +83,42 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
# )
|
||||
|
||||
self._config = config
|
||||
# make sure the client lives longer than any async calls
|
||||
self._client = AsyncOpenAI(
|
||||
base_url=f"{self._config.url}/v1",
|
||||
api_key=(self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"),
|
||||
timeout=self._config.timeout,
|
||||
)
|
||||
|
||||
@lru_cache # noqa: B019
|
||||
def _get_client(self, provider_model_id: str) -> AsyncOpenAI:
|
||||
"""
|
||||
For hosted models, https://integrate.api.nvidia.com/v1 is the primary base_url. However,
|
||||
some models are hosted on different URLs. This function returns the appropriate client
|
||||
for the given provider_model_id.
|
||||
|
||||
This relies on lru_cache and self._default_client to avoid creating a new client for each request
|
||||
or for each model that is hosted on https://integrate.api.nvidia.com/v1.
|
||||
|
||||
:param provider_model_id: The provider model ID
|
||||
:return: An OpenAI client
|
||||
"""
|
||||
|
||||
@lru_cache # noqa: B019
|
||||
def _get_client_for_base_url(base_url: str) -> AsyncOpenAI:
|
||||
"""
|
||||
Maintain a single OpenAI client per base_url.
|
||||
"""
|
||||
return AsyncOpenAI(
|
||||
base_url=base_url,
|
||||
api_key=(self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"),
|
||||
timeout=self._config.timeout,
|
||||
)
|
||||
|
||||
special_model_urls = {
|
||||
"meta/llama-3.2-11b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct",
|
||||
"meta/llama-3.2-90b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct",
|
||||
}
|
||||
|
||||
base_url = f"{self._config.url}/v1"
|
||||
if _is_nvidia_hosted(self._config) and provider_model_id in special_model_urls:
|
||||
base_url = special_model_urls[provider_model_id]
|
||||
|
||||
return _get_client_for_base_url(base_url)
|
||||
|
||||
async def completion(
|
||||
self,
|
||||
|
@ -105,9 +136,10 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
|
||||
await check_health(self._config) # this raises errors
|
||||
|
||||
provider_model_id = self.get_provider_model_id(model_id)
|
||||
request = convert_completion_request(
|
||||
request=CompletionRequest(
|
||||
model=self.get_provider_model_id(model_id),
|
||||
model=provider_model_id,
|
||||
content=content,
|
||||
sampling_params=sampling_params,
|
||||
response_format=response_format,
|
||||
|
@ -118,7 +150,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
)
|
||||
|
||||
try:
|
||||
response = await self._client.completions.create(**request)
|
||||
response = await self._get_client(provider_model_id).completions.create(**request)
|
||||
except APIConnectionError as e:
|
||||
raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e
|
||||
|
||||
|
@ -206,6 +238,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
|
||||
await check_health(self._config) # this raises errors
|
||||
|
||||
provider_model_id = self.get_provider_model_id(model_id)
|
||||
request = await convert_chat_completion_request(
|
||||
request=ChatCompletionRequest(
|
||||
model=self.get_provider_model_id(model_id),
|
||||
|
@ -221,7 +254,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
)
|
||||
|
||||
try:
|
||||
response = await self._client.chat.completions.create(**request)
|
||||
response = await self._get_client(provider_model_id).chat.completions.create(**request)
|
||||
except APIConnectionError as e:
|
||||
raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e
|
||||
|
||||
|
|
|
@ -25,6 +25,10 @@ class VLLMInferenceAdapterConfig(BaseModel):
|
|||
default="fake",
|
||||
description="The API token",
|
||||
)
|
||||
tls_verify: bool = Field(
|
||||
default=True,
|
||||
description="Whether to verify TLS certificates",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
|
@ -36,4 +40,5 @@ class VLLMInferenceAdapterConfig(BaseModel):
|
|||
"url": url,
|
||||
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
|
||||
"api_token": "${env.VLLM_API_TOKEN:fake}",
|
||||
"tls_verify": "${env.VLLM_TLS_VERIFY:true}",
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import json
|
|||
import logging
|
||||
from typing import AsyncGenerator, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
from openai import AsyncOpenAI
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
||||
|
@ -229,7 +230,11 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
|
||||
async def initialize(self) -> None:
|
||||
log.info(f"Initializing VLLM client with base_url={self.config.url}")
|
||||
self.client = AsyncOpenAI(base_url=self.config.url, api_key=self.config.api_token)
|
||||
self.client = AsyncOpenAI(
|
||||
base_url=self.config.url,
|
||||
api_key=self.config.api_token,
|
||||
http_client=None if self.config.tls_verify else httpx.AsyncClient(verify=False),
|
||||
)
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
|
|
@ -23,7 +23,6 @@ class QdrantVectorIOConfig(BaseModel):
|
|||
prefix: Optional[str] = None
|
||||
timeout: Optional[int] = None
|
||||
host: Optional[str] = None
|
||||
path: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from numpy.typing import NDArray
|
||||
from qdrant_client import AsyncQdrantClient, models
|
||||
|
@ -16,12 +16,13 @@ from llama_stack.apis.inference import InterleavedContent
|
|||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
EmbeddingIndex,
|
||||
VectorDBWithIndex,
|
||||
)
|
||||
|
||||
from .config import QdrantVectorIOConfig
|
||||
from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
CHUNK_ID_KEY = "_chunk_id"
|
||||
|
@ -99,17 +100,19 @@ class QdrantIndex(EmbeddingIndex):
|
|||
|
||||
|
||||
class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||
def __init__(self, config: QdrantVectorIOConfig, inference_api: Api.inference) -> None:
|
||||
def __init__(
|
||||
self, config: Union[RemoteQdrantVectorIOConfig, InlineQdrantVectorIOConfig], inference_api: Api.inference
|
||||
) -> None:
|
||||
self.config = config
|
||||
self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True))
|
||||
self.client: AsyncQdrantClient = None
|
||||
self.cache = {}
|
||||
self.inference_api = inference_api
|
||||
|
||||
async def initialize(self) -> None:
|
||||
pass
|
||||
self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True))
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
self.client.close()
|
||||
await self.client.close()
|
||||
|
||||
async def register_vector_db(
|
||||
self,
|
||||
|
@ -123,6 +126,11 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
|
||||
self.cache[vector_db.identifier] = index
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
if vector_db_id in self.cache:
|
||||
await self.cache[vector_db_id].index.delete()
|
||||
del self.cache[vector_db_id]
|
||||
|
||||
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> Optional[VectorDBWithIndex]:
|
||||
if vector_db_id in self.cache:
|
||||
return self.cache[vector_db_id]
|
||||
|
|
|
@ -192,7 +192,11 @@ class LiteLLMOpenAIMixin(
|
|||
if request.tools:
|
||||
input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
|
||||
if request.tool_config.tool_choice:
|
||||
input_dict["tool_choice"] = request.tool_config.tool_choice.value
|
||||
input_dict["tool_choice"] = (
|
||||
request.tool_config.tool_choice.value
|
||||
if isinstance(request.tool_config.tool_choice, ToolChoice)
|
||||
else request.tool_config.tool_choice
|
||||
)
|
||||
|
||||
provider_data = self.get_request_provider_data()
|
||||
key_field = self.provider_data_api_key_field
|
||||
|
|
|
@ -527,26 +527,30 @@ async def convert_message_to_openai_dict_new(
|
|||
async def _convert_message_content(
|
||||
content: InterleavedContent,
|
||||
) -> Union[str, Iterable[OpenAIChatCompletionContentPartParam]]:
|
||||
async def impl():
|
||||
async def impl(
|
||||
content_: InterleavedContent,
|
||||
) -> Union[str, OpenAIChatCompletionContentPartParam, List[OpenAIChatCompletionContentPartParam]]:
|
||||
# Llama Stack and OpenAI spec match for str and text input
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
elif isinstance(content, TextContentItem):
|
||||
if isinstance(content_, str):
|
||||
return content_
|
||||
elif isinstance(content_, TextContentItem):
|
||||
return OpenAIChatCompletionContentPartTextParam(
|
||||
type="text",
|
||||
text=content.text,
|
||||
text=content_.text,
|
||||
)
|
||||
elif isinstance(content, ImageContentItem):
|
||||
elif isinstance(content_, ImageContentItem):
|
||||
return OpenAIChatCompletionContentPartImageParam(
|
||||
type="image_url",
|
||||
image_url=OpenAIImageURL(url=await convert_image_content_to_url(content)),
|
||||
image_url=OpenAIImageURL(url=await convert_image_content_to_url(content_)),
|
||||
)
|
||||
elif isinstance(content, list):
|
||||
return [await _convert_message_content(item) for item in content]
|
||||
elif isinstance(content_, list):
|
||||
return [await impl(item) for item in content_]
|
||||
else:
|
||||
raise ValueError(f"Unsupported content type: {type(content)}")
|
||||
raise ValueError(f"Unsupported content type: {type(content_)}")
|
||||
|
||||
ret = await impl()
|
||||
ret = await impl(content)
|
||||
|
||||
# OpenAI*Message expects a str or list
|
||||
if isinstance(ret, str) or isinstance(ret, list):
|
||||
return ret
|
||||
else:
|
||||
|
@ -566,13 +570,14 @@ async def convert_message_to_openai_dict_new(
|
|||
OpenAIChatCompletionMessageToolCall(
|
||||
id=tool.call_id,
|
||||
function=OpenAIFunction(
|
||||
name=tool.tool_name,
|
||||
name=tool.tool_name if not isinstance(tool.tool_name, BuiltinTool) else tool.tool_name.value,
|
||||
arguments=json.dumps(tool.arguments),
|
||||
),
|
||||
type="function",
|
||||
)
|
||||
for tool in message.tool_calls
|
||||
],
|
||||
]
|
||||
or None,
|
||||
)
|
||||
elif isinstance(message, ToolResponseMessage):
|
||||
out = OpenAIChatCompletionToolMessage(
|
||||
|
@ -858,7 +863,8 @@ async def convert_openai_chat_completion_stream(
|
|||
event_type = ChatCompletionResponseEventType.progress
|
||||
|
||||
stop_reason = None
|
||||
toolcall_buffer = {}
|
||||
tool_call_idx_to_buffer = {}
|
||||
|
||||
async for chunk in stream:
|
||||
choice = chunk.choices[0] # assuming only one choice per chunk
|
||||
|
||||
|
@ -868,7 +874,6 @@ async def convert_openai_chat_completion_stream(
|
|||
|
||||
# if there's a tool call, emit an event for each tool in the list
|
||||
# if tool call and content, emit both separately
|
||||
|
||||
if choice.delta.tool_calls:
|
||||
# the call may have content and a tool call. ChatCompletionResponseEvent
|
||||
# does not support both, so we emit the content first
|
||||
|
@ -889,44 +894,53 @@ async def convert_openai_chat_completion_stream(
|
|||
)
|
||||
|
||||
if not enable_incremental_tool_calls:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=next(event_type),
|
||||
delta=ToolCallDelta(
|
||||
tool_call=_convert_openai_tool_calls(choice.delta.tool_calls)[0],
|
||||
parse_status=ToolCallParseStatus.succeeded,
|
||||
),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
for tool_call in choice.delta.tool_calls:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=event_type,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=_convert_openai_tool_calls([tool_call])[0],
|
||||
parse_status=ToolCallParseStatus.succeeded,
|
||||
),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
)
|
||||
)
|
||||
)
|
||||
else:
|
||||
tool_call = choice.delta.tool_calls[0]
|
||||
if "name" not in toolcall_buffer:
|
||||
toolcall_buffer["call_id"] = tool_call.id
|
||||
toolcall_buffer["name"] = None
|
||||
toolcall_buffer["content"] = ""
|
||||
if "arguments" not in toolcall_buffer:
|
||||
toolcall_buffer["arguments"] = ""
|
||||
for tool_call in choice.delta.tool_calls:
|
||||
idx = tool_call.index if hasattr(tool_call, "index") else 0
|
||||
|
||||
if tool_call.function.name:
|
||||
toolcall_buffer["name"] = tool_call.function.name
|
||||
delta = f"{toolcall_buffer['name']}("
|
||||
if tool_call.function.arguments:
|
||||
toolcall_buffer["arguments"] += tool_call.function.arguments
|
||||
delta = toolcall_buffer["arguments"]
|
||||
if idx not in tool_call_idx_to_buffer:
|
||||
tool_call_idx_to_buffer[idx] = {
|
||||
"call_id": tool_call.id,
|
||||
"name": None,
|
||||
"arguments": "",
|
||||
"content": "",
|
||||
}
|
||||
|
||||
toolcall_buffer["content"] += delta
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=event_type,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=delta,
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
)
|
||||
)
|
||||
else:
|
||||
buffer = tool_call_idx_to_buffer[idx]
|
||||
|
||||
if tool_call.function:
|
||||
if tool_call.function.name:
|
||||
buffer["name"] = tool_call.function.name
|
||||
delta = f"{buffer['name']}("
|
||||
buffer["content"] += delta
|
||||
|
||||
if tool_call.function.arguments:
|
||||
delta = tool_call.function.arguments
|
||||
buffer["arguments"] += delta
|
||||
buffer["content"] += delta
|
||||
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=event_type,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=delta,
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
)
|
||||
)
|
||||
elif choice.delta.content:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=event_type,
|
||||
|
@ -935,47 +949,51 @@ async def convert_openai_chat_completion_stream(
|
|||
)
|
||||
)
|
||||
|
||||
if toolcall_buffer:
|
||||
delta = ")"
|
||||
toolcall_buffer["content"] += delta
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=event_type,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=delta,
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
)
|
||||
)
|
||||
try:
|
||||
arguments = json.loads(toolcall_buffer["arguments"])
|
||||
tool_call = ToolCall(
|
||||
call_id=toolcall_buffer["call_id"],
|
||||
tool_name=toolcall_buffer["name"],
|
||||
arguments=arguments,
|
||||
)
|
||||
for idx, buffer in tool_call_idx_to_buffer.items():
|
||||
logger.debug(f"toolcall_buffer[{idx}]: {buffer}")
|
||||
if buffer["name"]:
|
||||
delta = ")"
|
||||
buffer["content"] += delta
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
event_type=event_type,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=tool_call,
|
||||
parse_status=ToolCallParseStatus.succeeded,
|
||||
tool_call=delta,
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
),
|
||||
stop_reason=stop_reason,
|
||||
logprobs=None,
|
||||
)
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.complete,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=toolcall_buffer["content"],
|
||||
parse_status=ToolCallParseStatus.failed,
|
||||
),
|
||||
stop_reason=stop_reason,
|
||||
|
||||
try:
|
||||
arguments = json.loads(buffer["arguments"])
|
||||
tool_call = ToolCall(
|
||||
call_id=buffer["call_id"],
|
||||
tool_name=buffer["name"],
|
||||
arguments=arguments,
|
||||
)
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=tool_call,
|
||||
parse_status=ToolCallParseStatus.succeeded,
|
||||
),
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Failed to parse arguments: {e}")
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=buffer["content"],
|
||||
parse_status=ToolCallParseStatus.failed,
|
||||
),
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
|
|
|
@ -18,12 +18,14 @@ providers:
|
|||
url: ${env.VLLM_URL}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
||||
- provider_id: vllm-safety
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.SAFETY_VLLM_URL}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
|
|
|
@ -18,6 +18,7 @@ providers:
|
|||
url: ${env.VLLM_URL}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
|
|
|
@ -56,7 +56,7 @@ dev = [
|
|||
"ruamel.yaml", # needed for openapi generator
|
||||
]
|
||||
# These are the dependencies required for running unit tests.
|
||||
unit = ["sqlite-vec", "openai", "aiosqlite", "pypdf", "chardet"]
|
||||
unit = ["sqlite-vec", "openai", "aiosqlite", "pypdf", "chardet", "qdrant-client"]
|
||||
# These are the core dependencies required for running integration tests. They are shared across all
|
||||
# providers. If a provider requires additional dependencies, please add them to your environment
|
||||
# separately. If you are using "uv" to execute your tests, you can use the "--with" flag to specify extra
|
||||
|
@ -114,7 +114,6 @@ exclude = [
|
|||
"./.git",
|
||||
"./docs/*",
|
||||
"./build",
|
||||
"./scripts",
|
||||
"./venv",
|
||||
"*.pyi",
|
||||
".pre-commit-config.yaml",
|
||||
|
@ -248,6 +247,7 @@ exclude = [
|
|||
"^llama_stack/providers/inline/vector_io/chroma/",
|
||||
"^llama_stack/providers/inline/vector_io/faiss/",
|
||||
"^llama_stack/providers/inline/vector_io/milvus/",
|
||||
"^llama_stack/providers/inline/vector_io/qdrant/",
|
||||
"^llama_stack/providers/inline/vector_io/sqlite_vec/",
|
||||
"^llama_stack/providers/remote/agents/sample/",
|
||||
"^llama_stack/providers/remote/datasetio/huggingface/",
|
||||
|
|
|
@ -11,7 +11,7 @@ import requests
|
|||
|
||||
|
||||
def get_all_releases(token):
|
||||
url = f"https://api.github.com/repos/meta-llama/llama-stack/releases"
|
||||
url = "https://api.github.com/repos/meta-llama/llama-stack/releases"
|
||||
headers = {"Accept": "application/vnd.github.v3+json"}
|
||||
|
||||
if token:
|
||||
|
@ -22,9 +22,7 @@ def get_all_releases(token):
|
|||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
raise Exception(
|
||||
f"Error fetching releases: {response.status_code}, {response.text}"
|
||||
)
|
||||
raise Exception(f"Error fetching releases: {response.status_code}, {response.text}")
|
||||
|
||||
|
||||
def clean_release_body(body):
|
||||
|
@ -55,7 +53,7 @@ def merge_release_notes(output_file, token=None):
|
|||
releases = get_all_releases(token)
|
||||
|
||||
with open(output_file, "w", encoding="utf-8") as md_file:
|
||||
md_file.write(f"# Changelog\n\n")
|
||||
md_file.write("# Changelog\n\n")
|
||||
|
||||
for release in releases:
|
||||
md_file.write(f"# {release['tag_name']}\n")
|
||||
|
|
|
@ -271,7 +271,7 @@ def test_custom_tool(llama_stack_client_with_mocked_inference, agent_config):
|
|||
client_tool = get_boiling_point
|
||||
agent_config = {
|
||||
**agent_config,
|
||||
"tools": ["builtin::websearch", client_tool],
|
||||
"tools": [client_tool],
|
||||
}
|
||||
|
||||
agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
|
||||
|
@ -320,42 +320,55 @@ def test_custom_tool_infinite_loop(llama_stack_client_with_mocked_inference, age
|
|||
assert num_tool_calls <= 5
|
||||
|
||||
|
||||
def test_tool_choice(llama_stack_client_with_mocked_inference, agent_config):
|
||||
def run_agent(tool_choice):
|
||||
client_tool = get_boiling_point
|
||||
|
||||
test_agent_config = {
|
||||
**agent_config,
|
||||
"tool_config": {"tool_choice": tool_choice},
|
||||
"tools": [client_tool],
|
||||
}
|
||||
|
||||
agent = Agent(llama_stack_client_with_mocked_inference, **test_agent_config)
|
||||
session_id = agent.create_session(f"test-session-{uuid4()}")
|
||||
|
||||
response = agent.create_turn(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the boiling point of polyjuice?",
|
||||
},
|
||||
],
|
||||
session_id=session_id,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
return [step for step in response.steps if step.step_type == "tool_execution"]
|
||||
|
||||
tool_execution_steps = run_agent("required")
|
||||
def test_tool_choice_required(llama_stack_client_with_mocked_inference, agent_config):
|
||||
tool_execution_steps = run_agent_with_tool_choice(
|
||||
llama_stack_client_with_mocked_inference, agent_config, "required"
|
||||
)
|
||||
assert len(tool_execution_steps) > 0
|
||||
|
||||
tool_execution_steps = run_agent("none")
|
||||
|
||||
def test_tool_choice_none(llama_stack_client_with_mocked_inference, agent_config):
|
||||
tool_execution_steps = run_agent_with_tool_choice(llama_stack_client_with_mocked_inference, agent_config, "none")
|
||||
assert len(tool_execution_steps) == 0
|
||||
|
||||
tool_execution_steps = run_agent("get_boiling_point")
|
||||
|
||||
def test_tool_choice_get_boiling_point(llama_stack_client_with_mocked_inference, agent_config):
|
||||
if "llama" not in agent_config["model"].lower():
|
||||
pytest.xfail("NotImplemented for non-llama models")
|
||||
|
||||
tool_execution_steps = run_agent_with_tool_choice(
|
||||
llama_stack_client_with_mocked_inference, agent_config, "get_boiling_point"
|
||||
)
|
||||
assert len(tool_execution_steps) >= 1 and tool_execution_steps[0].tool_calls[0].tool_name == "get_boiling_point"
|
||||
|
||||
|
||||
def run_agent_with_tool_choice(client, agent_config, tool_choice):
|
||||
client_tool = get_boiling_point
|
||||
|
||||
test_agent_config = {
|
||||
**agent_config,
|
||||
"tool_config": {"tool_choice": tool_choice},
|
||||
"tools": [client_tool],
|
||||
"max_infer_iters": 2,
|
||||
}
|
||||
|
||||
agent = Agent(client, **test_agent_config)
|
||||
session_id = agent.create_session(f"test-session-{uuid4()}")
|
||||
|
||||
response = agent.create_turn(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the boiling point of polyjuice?",
|
||||
},
|
||||
],
|
||||
session_id=session_id,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
return [step for step in response.steps if step.step_type == "tool_execution"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("rag_tool_name", ["builtin::rag/knowledge_search", "builtin::rag"])
|
||||
def test_rag_agent(llama_stack_client_with_mocked_inference, agent_config, rag_tool_name):
|
||||
urls = ["chat.rst", "llama3.rst", "memory_optimizations.rst", "lora_finetune.rst"]
|
||||
|
@ -571,7 +584,7 @@ def test_rag_and_code_agent(llama_stack_client_with_mocked_inference, agent_conf
|
|||
[(get_boiling_point, False), (get_boiling_point_with_metadata, True)],
|
||||
)
|
||||
def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_config, client_tools):
|
||||
client_tool, expectes_metadata = client_tools
|
||||
client_tool, expects_metadata = client_tools
|
||||
agent_config = {
|
||||
**agent_config,
|
||||
"input_shields": [],
|
||||
|
@ -597,7 +610,7 @@ def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_co
|
|||
assert steps[0].step_type == "inference"
|
||||
assert steps[1].step_type == "tool_execution"
|
||||
assert steps[1].tool_calls[0].tool_name.startswith("get_boiling_point")
|
||||
if expectes_metadata:
|
||||
if expects_metadata:
|
||||
assert steps[1].tool_responses[0].metadata["source"] == "https://www.google.com"
|
||||
assert steps[2].step_type == "inference"
|
||||
|
||||
|
@ -609,3 +622,44 @@ def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_co
|
|||
assert last_step_completed_at < step.started_at
|
||||
assert step.started_at < step.completed_at
|
||||
last_step_completed_at = step.completed_at
|
||||
|
||||
|
||||
def test_multi_tool_calls(llama_stack_client_with_mocked_inference, agent_config):
|
||||
if "gpt" not in agent_config["model"]:
|
||||
pytest.xfail("Only tested on GPT models")
|
||||
|
||||
agent_config = {
|
||||
**agent_config,
|
||||
"tools": [get_boiling_point],
|
||||
}
|
||||
|
||||
agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
|
||||
session_id = agent.create_session(f"test-session-{uuid4()}")
|
||||
|
||||
response = agent.create_turn(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Call get_boiling_point twice to answer: What is the boiling point of polyjuice in both celsius and fahrenheit?",
|
||||
},
|
||||
],
|
||||
session_id=session_id,
|
||||
stream=False,
|
||||
)
|
||||
steps = response.steps
|
||||
assert len(steps) == 7
|
||||
assert steps[0].step_type == "shield_call"
|
||||
assert steps[1].step_type == "inference"
|
||||
assert steps[2].step_type == "shield_call"
|
||||
assert steps[3].step_type == "tool_execution"
|
||||
assert steps[4].step_type == "shield_call"
|
||||
assert steps[5].step_type == "inference"
|
||||
assert steps[6].step_type == "shield_call"
|
||||
|
||||
tool_execution_step = steps[3]
|
||||
assert len(tool_execution_step.tool_calls) == 2
|
||||
assert tool_execution_step.tool_calls[0].tool_name.startswith("get_boiling_point")
|
||||
assert tool_execution_step.tool_calls[1].tool_name.startswith("get_boiling_point")
|
||||
|
||||
output = response.output_message.content.lower()
|
||||
assert "-100" in output and "-212" in output
|
||||
|
|
|
@ -187,8 +187,8 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
|
|||
loop.set_debug(True)
|
||||
caplog.set_level(logging.WARNING)
|
||||
|
||||
# Log when event loop is blocked for more than 100ms
|
||||
loop.slow_callback_duration = 0.1
|
||||
# Log when event loop is blocked for more than 200ms
|
||||
loop.slow_callback_duration = 0.2
|
||||
# Sleep for 500ms in our delayed http response
|
||||
sleep_time = 0.5
|
||||
|
||||
|
|
42
tests/unit/providers/vector_io/conftest.py
Normal file
42
tests/unit/providers/vector_io/conftest.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.vector_io import Chunk
|
||||
|
||||
EMBEDDING_DIMENSION = 384
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vector_db_id() -> str:
|
||||
return f"test-vector-db-{random.randint(1, 100)}"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def embedding_dimension() -> int:
|
||||
return EMBEDDING_DIMENSION
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_chunks():
|
||||
"""Generates chunks that force multiple batches for a single document to expose ID conflicts."""
|
||||
n, k = 10, 3
|
||||
sample = [
|
||||
Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"})
|
||||
for j in range(k)
|
||||
for i in range(n)
|
||||
]
|
||||
return sample
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_embeddings(sample_chunks):
|
||||
np.random.seed(42)
|
||||
return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks])
|
135
tests/unit/providers/vector_io/test_qdrant.py
Normal file
135
tests/unit/providers/vector_io/test_qdrant.py
Normal file
|
@ -0,0 +1,135 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from llama_stack.apis.inference import EmbeddingsResponse, Inference
|
||||
from llama_stack.apis.vector_io import (
|
||||
QueryChunksResponse,
|
||||
VectorDB,
|
||||
VectorDBStore,
|
||||
)
|
||||
from llama_stack.providers.inline.vector_io.qdrant.config import (
|
||||
QdrantVectorIOConfig as InlineQdrantVectorIOConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.vector_io.qdrant.qdrant import (
|
||||
QdrantVectorIOAdapter,
|
||||
)
|
||||
|
||||
# This test is a unit test for the QdrantVectorIOAdapter class. This should only contain
|
||||
# tests which are specific to this class. More general (API-level) tests should be placed in
|
||||
# tests/integration/vector_io/
|
||||
#
|
||||
# How to run this test:
|
||||
#
|
||||
# pytest tests/unit/providers/vector_io/test_qdrant.py \
|
||||
# -v -s --tb=short --disable-warnings --asyncio-mode=auto
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def qdrant_config(tmp_path) -> InlineQdrantVectorIOConfig:
|
||||
return InlineQdrantVectorIOConfig(path=os.path.join(tmp_path, "qdrant.db"))
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def loop():
|
||||
return asyncio.new_event_loop()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_vector_db(vector_db_id) -> MagicMock:
|
||||
mock_vector_db = MagicMock(spec=VectorDB)
|
||||
mock_vector_db.embedding_model = "embedding_model"
|
||||
mock_vector_db.identifier = vector_db_id
|
||||
return mock_vector_db
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_vector_db_store(mock_vector_db) -> MagicMock:
|
||||
mock_store = MagicMock(spec=VectorDBStore)
|
||||
mock_store.get_vector_db = AsyncMock(return_value=mock_vector_db)
|
||||
return mock_store
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_api_service(sample_embeddings):
|
||||
mock_api_service = MagicMock(spec=Inference)
|
||||
mock_api_service.embeddings = AsyncMock(return_value=EmbeddingsResponse(embeddings=sample_embeddings))
|
||||
return mock_api_service
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service, loop) -> QdrantVectorIOAdapter:
|
||||
adapter = QdrantVectorIOAdapter(config=qdrant_config, inference_api=mock_api_service)
|
||||
adapter.vector_db_store = mock_vector_db_store
|
||||
await adapter.initialize()
|
||||
yield adapter
|
||||
await adapter.shutdown()
|
||||
|
||||
|
||||
__QUERY = "Sample query"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 30)])
|
||||
async def test_qdrant_adapter_returns_expected_chunks(
|
||||
qdrant_adapter: QdrantVectorIOAdapter,
|
||||
vector_db_id,
|
||||
sample_chunks,
|
||||
sample_embeddings,
|
||||
max_query_chunks,
|
||||
expected_chunks,
|
||||
) -> None:
|
||||
assert qdrant_adapter is not None
|
||||
await qdrant_adapter.insert_chunks(vector_db_id, sample_chunks)
|
||||
|
||||
index = await qdrant_adapter._get_and_cache_vector_db_index(vector_db_id=vector_db_id)
|
||||
assert index is not None
|
||||
|
||||
response = await qdrant_adapter.query_chunks(
|
||||
query=__QUERY,
|
||||
vector_db_id=vector_db_id,
|
||||
params={"max_chunks": max_query_chunks},
|
||||
)
|
||||
assert isinstance(response, QueryChunksResponse)
|
||||
assert len(response.chunks) == expected_chunks
|
||||
|
||||
|
||||
# To by-pass attempt to convert a Mock to JSON
|
||||
def _prepare_for_json(value: Any) -> str:
|
||||
return str(value)
|
||||
|
||||
|
||||
@patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json)
|
||||
@pytest.mark.asyncio
|
||||
async def test_qdrant_register_and_unregister_vector_db(
|
||||
qdrant_adapter: QdrantVectorIOAdapter,
|
||||
mock_vector_db,
|
||||
sample_chunks,
|
||||
) -> None:
|
||||
# Initially, no collections
|
||||
vector_db_id = mock_vector_db.identifier
|
||||
assert len((await qdrant_adapter.client.get_collections()).collections) == 0
|
||||
|
||||
# Register does not create a collection
|
||||
assert not (await qdrant_adapter.client.collection_exists(vector_db_id))
|
||||
await qdrant_adapter.register_vector_db(mock_vector_db)
|
||||
assert not (await qdrant_adapter.client.collection_exists(vector_db_id))
|
||||
|
||||
# First insert creates the collection
|
||||
await qdrant_adapter.insert_chunks(vector_db_id, sample_chunks)
|
||||
assert await qdrant_adapter.client.collection_exists(vector_db_id)
|
||||
|
||||
# Unregister deletes the collection
|
||||
await qdrant_adapter.unregister_vector_db(vector_db_id)
|
||||
assert not (await qdrant_adapter.client.collection_exists(vector_db_id))
|
||||
assert len((await qdrant_adapter.client.get_collections()).collections) == 0
|
|
@ -29,8 +29,6 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
|
|||
# -v -s --tb=short --disable-warnings --asyncio-mode=auto
|
||||
|
||||
SQLITE_VEC_PROVIDER = "sqlite_vec"
|
||||
EMBEDDING_DIMENSION = 384
|
||||
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
|
@ -50,26 +48,8 @@ def sqlite_connection(loop):
|
|||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session", autouse=True)
|
||||
async def sqlite_vec_index(sqlite_connection):
|
||||
return await SQLiteVecIndex.create(dimension=EMBEDDING_DIMENSION, connection=sqlite_connection, bank_id="test_bank")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_chunks():
|
||||
"""Generates chunks that force multiple batches for a single document to expose ID conflicts."""
|
||||
n, k = 10, 3
|
||||
sample = [
|
||||
Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"})
|
||||
for j in range(k)
|
||||
for i in range(n)
|
||||
]
|
||||
return sample
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_embeddings(sample_chunks):
|
||||
np.random.seed(42)
|
||||
return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks])
|
||||
async def sqlite_vec_index(sqlite_connection, embedding_dimension):
|
||||
return await SQLiteVecIndex.create(dimension=embedding_dimension, connection=sqlite_connection, bank_id="test_bank")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
@ -82,21 +62,21 @@ async def test_add_chunks(sqlite_vec_index, sample_chunks, sample_embeddings):
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_chunks(sqlite_vec_index, sample_chunks, sample_embeddings):
|
||||
async def test_query_chunks(sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension):
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
query_embedding = np.random.rand(EMBEDDING_DIMENSION).astype(np.float32)
|
||||
query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
|
||||
response = await sqlite_vec_index.query(query_embedding, k=2, score_threshold=0.0)
|
||||
assert isinstance(response, QueryChunksResponse)
|
||||
assert len(response.chunks) == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks):
|
||||
async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dimension):
|
||||
"""Test that chunk IDs do not conflict across batches when inserting chunks."""
|
||||
# Reduce batch size to force multiple batches for same document
|
||||
# since there are 10 chunks per document and batch size is 2
|
||||
batch_size = 2
|
||||
sample_embeddings = np.random.rand(len(sample_chunks), EMBEDDING_DIMENSION).astype(np.float32)
|
||||
sample_embeddings = np.random.rand(len(sample_chunks), embedding_dimension).astype(np.float32)
|
||||
|
||||
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings, batch_size=batch_size)
|
||||
|
||||
|
|
198
uv.lock
generated
198
uv.lock
generated
|
@ -1,5 +1,4 @@
|
|||
version = 1
|
||||
revision = 1
|
||||
requires-python = ">=3.10"
|
||||
resolution-markers = [
|
||||
"(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
|
@ -8,9 +7,12 @@ resolution-markers = [
|
|||
"(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"python_full_version == '3.11.*' and sys_platform == 'darwin'",
|
||||
"(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"python_full_version >= '3.12' and sys_platform == 'darwin'",
|
||||
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"python_full_version >= '3.13' and sys_platform == 'darwin'",
|
||||
"python_full_version == '3.12.*' and sys_platform == 'darwin'",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -793,6 +795,107 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/89/30/2bd0eb03a7dee7727cd2ec643d1e992979e62d5e7443507381cce0455132/googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741", size = 164985 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio"
|
||||
version = "1.71.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1c/95/aa11fc09a85d91fbc7dd405dcb2a1e0256989d67bf89fa65ae24b3ba105a/grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c", size = 12549828 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7c/c5/ef610b3f988cc0cc67b765f72b8e2db06a1db14e65acb5ae7810a6b7042e/grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd", size = 5210643 },
|
||||
{ url = "https://files.pythonhosted.org/packages/bf/de/c84293c961622df302c0d5d07ec6e2d4cd3874ea42f602be2df09c4ad44f/grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d", size = 11308962 },
|
||||
{ url = "https://files.pythonhosted.org/packages/7c/38/04c9e0dc8c904570c80faa1f1349b190b63e45d6b2782ec8567b050efa9d/grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea", size = 5699236 },
|
||||
{ url = "https://files.pythonhosted.org/packages/95/96/e7be331d1298fa605ea7c9ceafc931490edd3d5b33c4f695f1a0667f3491/grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69", size = 6339767 },
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/b7/7e7b7bb6bb18baf156fd4f2f5b254150dcdd6cbf0def1ee427a2fb2bfc4d/grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73", size = 5943028 },
|
||||
{ url = "https://files.pythonhosted.org/packages/13/aa/5fb756175995aeb47238d706530772d9a7ac8e73bcca1b47dc145d02c95f/grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804", size = 6031841 },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/93/172783e01eed61f7f180617b7fa4470f504e383e32af2587f664576a7101/grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6", size = 6651039 },
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/99/62654b220a27ed46d3313252214f4bc66261143dc9b58004085cd0646753/grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5", size = 6198465 },
|
||||
{ url = "https://files.pythonhosted.org/packages/68/35/96116de833b330abe4412cc94edc68f99ed2fa3e39d8713ff307b3799e81/grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509", size = 3620382 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b7/09/f32ef637e386f3f2c02effac49699229fa560ce9007682d24e9e212d2eb4/grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a", size = 4280302 },
|
||||
{ url = "https://files.pythonhosted.org/packages/63/04/a085f3ad4133426f6da8c1becf0749872a49feb625a407a2e864ded3fb12/grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef", size = 5210453 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b4/d5/0bc53ed33ba458de95020970e2c22aa8027b26cc84f98bea7fcad5d695d1/grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7", size = 11347567 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/6d/ce334f7e7a58572335ccd61154d808fe681a4c5e951f8a1ff68f5a6e47ce/grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7", size = 5696067 },
|
||||
{ url = "https://files.pythonhosted.org/packages/05/4a/80befd0b8b1dc2b9ac5337e57473354d81be938f87132e147c4a24a581bd/grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7", size = 6348377 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c7/67/cbd63c485051eb78663355d9efd1b896cfb50d4a220581ec2cb9a15cd750/grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e", size = 5940407 },
|
||||
{ url = "https://files.pythonhosted.org/packages/98/4b/7a11aa4326d7faa499f764eaf8a9b5a0eb054ce0988ee7ca34897c2b02ae/grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b", size = 6030915 },
|
||||
{ url = "https://files.pythonhosted.org/packages/eb/a2/cdae2d0e458b475213a011078b0090f7a1d87f9a68c678b76f6af7c6ac8c/grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7", size = 6648324 },
|
||||
{ url = "https://files.pythonhosted.org/packages/27/df/f345c8daaa8d8574ce9869f9b36ca220c8845923eb3087e8f317eabfc2a8/grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3", size = 6197839 },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/2c/cd488dc52a1d0ae1bad88b0d203bc302efbb88b82691039a6d85241c5781/grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444", size = 3619978 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/3f/cf92e7e62ccb8dbdf977499547dfc27133124d6467d3a7d23775bcecb0f9/grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b", size = 4282279 },
|
||||
{ url = "https://files.pythonhosted.org/packages/4c/83/bd4b6a9ba07825bd19c711d8b25874cd5de72c2a3fbf635c3c344ae65bd2/grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537", size = 5184101 },
|
||||
{ url = "https://files.pythonhosted.org/packages/31/ea/2e0d90c0853568bf714693447f5c73272ea95ee8dad107807fde740e595d/grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7", size = 11310927 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ac/bc/07a3fd8af80467390af491d7dc66882db43884128cdb3cc8524915e0023c/grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec", size = 5654280 },
|
||||
{ url = "https://files.pythonhosted.org/packages/16/af/21f22ea3eed3d0538b6ef7889fce1878a8ba4164497f9e07385733391e2b/grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594", size = 6312051 },
|
||||
{ url = "https://files.pythonhosted.org/packages/49/9d/e12ddc726dc8bd1aa6cba67c85ce42a12ba5b9dd75d5042214a59ccf28ce/grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c", size = 5910666 },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/e9/38713d6d67aedef738b815763c25f092e0454dc58e77b1d2a51c9d5b3325/grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67", size = 6012019 },
|
||||
{ url = "https://files.pythonhosted.org/packages/80/da/4813cd7adbae6467724fa46c952d7aeac5e82e550b1c62ed2aeb78d444ae/grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db", size = 6637043 },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/ca/c0d767082e39dccb7985c73ab4cf1d23ce8613387149e9978c70c3bf3b07/grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79", size = 6186143 },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/61/7b2c8ec13303f8fe36832c13d91ad4d4ba57204b1c723ada709c346b2271/grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a", size = 3604083 },
|
||||
{ url = "https://files.pythonhosted.org/packages/fd/7c/1e429c5fb26122055d10ff9a1d754790fb067d83c633ff69eddcf8e3614b/grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8", size = 4272191 },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/dd/b00cbb45400d06b26126dcfdbdb34bb6c4f28c3ebbd7aea8228679103ef6/grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379", size = 5184138 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/0a/4651215983d590ef53aac40ba0e29dda941a02b097892c44fa3357e706e5/grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3", size = 11310747 },
|
||||
{ url = "https://files.pythonhosted.org/packages/57/a3/149615b247f321e13f60aa512d3509d4215173bdb982c9098d78484de216/grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db", size = 5653991 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ca/56/29432a3e8d951b5e4e520a40cd93bebaa824a14033ea8e65b0ece1da6167/grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29", size = 6312781 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a3/f8/286e81a62964ceb6ac10b10925261d4871a762d2a763fbf354115f9afc98/grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4", size = 5910479 },
|
||||
{ url = "https://files.pythonhosted.org/packages/35/67/d1febb49ec0f599b9e6d4d0d44c2d4afdbed9c3e80deb7587ec788fcf252/grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3", size = 6013262 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/04/f9ceda11755f0104a075ad7163fc0d96e2e3a9fe25ef38adfc74c5790daf/grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b", size = 6643356 },
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/ce/236dbc3dc77cf9a9242adcf1f62538734ad64727fabf39e1346ad4bd5c75/grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637", size = 6186564 },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/fd/b3348fce9dd4280e221f513dd54024e765b21c348bc475516672da4218e9/grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb", size = 3601890 },
|
||||
{ url = "https://files.pythonhosted.org/packages/be/f8/db5d5f3fc7e296166286c2a397836b8b042f7ad1e11028d82b061701f0f7/grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366", size = 4273308 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio-tools"
|
||||
version = "1.71.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "grpcio" },
|
||||
{ name = "protobuf" },
|
||||
{ name = "setuptools" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/05/d2/c0866a48c355a6a4daa1f7e27e210c7fa561b1f3b7c0bce2671e89cfa31e/grpcio_tools-1.71.0.tar.gz", hash = "sha256:38dba8e0d5e0fb23a034e09644fdc6ed862be2371887eee54901999e8f6792a8", size = 5326008 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/60/aa7f261eda558d018457e5c8bd8a8079136e5107a0942fd3167477ab50e2/grpcio_tools-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:f4ad7f0d756546902597053d70b3af2606fbd70d7972876cd75c1e241d22ae00", size = 2385558 },
|
||||
{ url = "https://files.pythonhosted.org/packages/0d/e3/e47b96e93e51398ba3462e027d93a10c0c23fffc31733de9bd4f44a2b867/grpcio_tools-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:64bdb291df61cf570b5256777ad5fe2b1db6d67bc46e55dc56a0a862722ae329", size = 5930039 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/69/5d8920002483b2a65ae3b03329dfe3b668c3592f001d5358e1538f540012/grpcio_tools-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:8dd9795e982d77a4b496f7278b943c2563d9afde2069cdee78c111a40cc4d675", size = 2351932 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/50/8116e307662a2337cdc3f0e1a8b23af197129448b7ff7e0cf1a76c9b0178/grpcio_tools-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1b5860c41a36b26fec4f52998f1a451d0525a5c9a4fb06b6ea3e9211abdb925", size = 2744962 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/4b/d95be4aaf78d7b02dff3bd332c75c228288178e92af0e5228759ac5002a0/grpcio_tools-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3059c14035e5dc03d462f261e5900b9a077fd1a36976c3865b8507474520bad4", size = 2476716 },
|
||||
{ url = "https://files.pythonhosted.org/packages/37/c2/c784a3705b1a1fd277751a8fc881d5a29325a460b9211e3c6164f594b178/grpcio_tools-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f360981b215b1d5aff9235b37e7e1826246e35bbac32a53e41d4e990a37b8f4c", size = 2854132 },
|
||||
{ url = "https://files.pythonhosted.org/packages/93/8f/173adbf72ed3996e1962182b55abf30151edc8b53daac0bf15cc3dc4b09e/grpcio_tools-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bfe3888c3bbe16a5aa39409bc38744a31c0c3d2daa2b0095978c56e106c85b42", size = 3305069 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e4/a8/b1e7df63e7f83336275922f92ded1cd6918964c511280b31c872c54538f4/grpcio_tools-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:145985c0bf12131f0a1503e65763e0f060473f7f3928ed1ff3fb0e8aad5bc8ac", size = 2916636 },
|
||||
{ url = "https://files.pythonhosted.org/packages/be/a3/53f1e74c6e1c92ad94d7a0127a60fe913276a3e8c864737a053a1574b05c/grpcio_tools-1.71.0-cp310-cp310-win32.whl", hash = "sha256:82c430edd939bb863550ee0fecf067d78feff828908a1b529bbe33cc57f2419c", size = 949576 },
|
||||
{ url = "https://files.pythonhosted.org/packages/97/43/4a3ae830c1405bcb1ba47f2225779dbe9fc009ba341d4a90012919304855/grpcio_tools-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:83e90724e3f02415c628e4ead1d6ffe063820aaaa078d9a39176793df958cd5a", size = 1121087 },
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/ec/73b9797ffec80e1faf039ce3e2f0513e26e1a68eedc525ed294ae2a44d03/grpcio_tools-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:1f19b16b49afa5d21473f49c0966dd430c88d089cd52ac02404d8cef67134efb", size = 2385557 },
|
||||
{ url = "https://files.pythonhosted.org/packages/bf/87/42c6e192b7b09c9610a53e771797f7826aee4f6e769683985ae406a2d862/grpcio_tools-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:459c8f5e00e390aecd5b89de67deb3ec7188a274bc6cb50e43cef35ab3a3f45d", size = 5954404 },
|
||||
{ url = "https://files.pythonhosted.org/packages/25/30/3fd385a56d32dce34cde09a64dbaf7cf85d395f2bcd86dd41e4b4ee5938f/grpcio_tools-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:edab7e6518de01196be37f96cb1e138c3819986bf5e2a6c9e1519b4d716b2f5a", size = 2352061 },
|
||||
{ url = "https://files.pythonhosted.org/packages/87/eb/e9971c7693a2d85e7f55760f7906211a95ff74af4d41b05d187849d7fb58/grpcio_tools-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8b93b9f6adc7491d4c10144c0643409db298e5e63c997106a804f6f0248dbaf4", size = 2745033 },
|
||||
{ url = "https://files.pythonhosted.org/packages/15/72/4e69beae87a1b334f80da9e93c8e2f5c8fe4860c956a781246a092dc4c97/grpcio_tools-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ae5f2efa9e644c10bf1021600bfc099dfbd8e02b184d2d25dc31fcd6c2bc59e", size = 2476743 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/f3/336d2c83f1bfc00a5376bf20dd2273d7aa891b03dd91b11c71ca47392351/grpcio_tools-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:65aa082f4435571d65d5ce07fc444f23c3eff4f3e34abef599ef8c9e1f6f360f", size = 2853693 },
|
||||
{ url = "https://files.pythonhosted.org/packages/62/ba/cc7ace518c11501a4b8620df5edb8188e81470e5b82dc6829212f3e9b2ff/grpcio_tools-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1331e726e08b7bdcbf2075fcf4b47dff07842b04845e6e220a08a4663e232d7f", size = 3304474 },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/0d/4b843654af3d5aa2f1a5775df1d583e6e3471e6d569106fd3213ad185a98/grpcio_tools-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6693a7d3ba138b0e693b3d1f687cdd9db9e68976c3fa2b951c17a072fea8b583", size = 2916147 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e4/14/047e1c817422bc3d434247b9c640c51fd51ca4e047583ff31d927c3dea73/grpcio_tools-1.71.0-cp311-cp311-win32.whl", hash = "sha256:6d11ed3ff7b6023b5c72a8654975324bb98c1092426ba5b481af406ff559df00", size = 949374 },
|
||||
{ url = "https://files.pythonhosted.org/packages/86/cb/739a1b6d517672693796022c0f9061f63eaa243ec70cbbfa59bf881ed9fb/grpcio_tools-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:072b2a5805ac97e4623b3aa8f7818275f3fb087f4aa131b0fce00471065f6eaa", size = 1120786 },
|
||||
{ url = "https://files.pythonhosted.org/packages/de/e4/156956b92ad0298290c3d68e6670bc5a6fbefcccfe1ec3997480605e7135/grpcio_tools-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:61c0409d5bdac57a7bd0ce0ab01c1c916728fe4c8a03d77a25135ad481eb505c", size = 2385480 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/08/9930eb4bb38c5214041c9f24f8b35e9864a7938282db986836546c782d52/grpcio_tools-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:28784f39921d061d2164a9dcda5164a69d07bf29f91f0ea50b505958292312c9", size = 5951891 },
|
||||
{ url = "https://files.pythonhosted.org/packages/73/65/931f29ec9c33719d48e1e30446ecce6f5d2cd4e4934fa73fbe07de41c43b/grpcio_tools-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:192808cf553cedca73f0479cc61d5684ad61f24db7a5f3c4dfe1500342425866", size = 2351967 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/26/2ec8748534406214f20a4809c36efcfa88d1a26246e8312102e3ef8c295d/grpcio_tools-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:989ee9da61098230d3d4c8f8f8e27c2de796f1ff21b1c90110e636d9acd9432b", size = 2745003 },
|
||||
{ url = "https://files.pythonhosted.org/packages/f1/33/87b4610c86a4e10ee446b543a4d536f94ab04f828bab841f0bc1a083de72/grpcio_tools-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:541a756276c8a55dec991f6c0106ae20c8c8f5ce8d0bdbfcb01e2338d1a8192b", size = 2476455 },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/7c/f7f0cc36a43be9d45b3ce2a55245f3c7d063a24b7930dd719929e58871a4/grpcio_tools-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:870c0097700d13c403e5517cb7750ab5b4a791ce3e71791c411a38c5468b64bd", size = 2854333 },
|
||||
{ url = "https://files.pythonhosted.org/packages/07/c4/34b9ea62b173c13fa7accba5f219355b320c05c80c79c3ba70fe52f47b2f/grpcio_tools-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:abd57f615e88bf93c3c6fd31f923106e3beb12f8cd2df95b0d256fa07a7a0a57", size = 3304297 },
|
||||
{ url = "https://files.pythonhosted.org/packages/5c/ef/9d3449db8a07688dc3de7dcbd2a07048a128610b1a491c5c0cb3e90a00c5/grpcio_tools-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:753270e2d06d37e6d7af8967d1d059ec635ad215882041a36294f4e2fd502b2e", size = 2916212 },
|
||||
{ url = "https://files.pythonhosted.org/packages/2e/c6/990e8194c934dfe7cf89ef307c319fa4f2bc0b78aeca707addbfa1e502f1/grpcio_tools-1.71.0-cp312-cp312-win32.whl", hash = "sha256:0e647794bd7138b8c215e86277a9711a95cf6a03ff6f9e555d54fdf7378b9f9d", size = 948849 },
|
||||
{ url = "https://files.pythonhosted.org/packages/42/95/3c36d3205e6bd19853cc2420e44b6ef302eb4cfcf56498973c7e85f6c03b/grpcio_tools-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:48debc879570972d28bfe98e4970eff25bb26da3f383e0e49829b2d2cd35ad87", size = 1120294 },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/a7/70dc7e9957bcbaccd4dcb6cc11215e0b918f546d55599221522fe0d073e0/grpcio_tools-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:9a78d07d6c301a25ef5ede962920a522556a1dfee1ccc05795994ceb867f766c", size = 2384758 },
|
||||
{ url = "https://files.pythonhosted.org/packages/65/79/57320b28d0a0c5ec94095fd571a65292f8ed7e1c47e59ae4021e8a48d49b/grpcio_tools-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:580ac88141c9815557e63c9c04f5b1cdb19b4db8d0cb792b573354bde1ee8b12", size = 5951661 },
|
||||
{ url = "https://files.pythonhosted.org/packages/80/3d/343df5ed7c5dd66fc7a19e4ef3e97ccc4f5d802122b04cd6492f0dcd79f5/grpcio_tools-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f7c678e68ece0ae908ecae1c4314a0c2c7f83e26e281738b9609860cc2c82d96", size = 2351571 },
|
||||
{ url = "https://files.pythonhosted.org/packages/56/2f/b9736e8c84e880c4237f5b880c6c799b4977c5cde190999bc7ab4b2ec445/grpcio_tools-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:56ecd6cc89b5e5eed1de5eb9cafce86c9c9043ee3840888cc464d16200290b53", size = 2744580 },
|
||||
{ url = "https://files.pythonhosted.org/packages/76/9b/bdb384967353da7bf64bac4232f4cf8ae43f19d0f2f640978d4d4197e667/grpcio_tools-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e52a041afc20ab2431d756b6295d727bd7adee813b21b06a3483f4a7a15ea15f", size = 2475978 },
|
||||
{ url = "https://files.pythonhosted.org/packages/26/71/1411487fd7862d347b98fda5e3beef611a71b2ac2faac62a965d9e2536b3/grpcio_tools-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2a1712f12102b60c8d92779b89d0504e0d6f3a59f2b933e5622b8583f5c02992", size = 2853314 },
|
||||
{ url = "https://files.pythonhosted.org/packages/03/06/59d0523eb1ba2f64edc72cb150152fa1b2e77061cae3ef3ecd3ef2a87f51/grpcio_tools-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:41878cb7a75477e62fdd45e7e9155b3af1b7a5332844021e2511deaf99ac9e6c", size = 3303981 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/71/fb9fb49f2b738ec1dfbbc8cdce0b26e5f9c5fc0edef72e453580620d6a36/grpcio_tools-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:682e958b476049ccc14c71bedf3f979bced01f6e0c04852efc5887841a32ad6b", size = 2915876 },
|
||||
{ url = "https://files.pythonhosted.org/packages/bd/0f/0d49f6fe6fa2d09e9820dd9eeb30437e86002303076be2b6ada0fb52b8f2/grpcio_tools-1.71.0-cp313-cp313-win32.whl", hash = "sha256:0ccfb837152b7b858b9f26bb110b3ae8c46675d56130f6c2f03605c4f129be13", size = 948245 },
|
||||
{ url = "https://files.pythonhosted.org/packages/bb/14/ab131a39187bfea950280b2277a82d2033469fe8c86f73b10b19f53cc5ca/grpcio_tools-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:ffff9bc5eacb34dd26b487194f7d44a3e64e752fc2cf049d798021bf25053b87", size = 1119649 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.14.0"
|
||||
|
@ -802,6 +905,28 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "4.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "hpack" },
|
||||
{ name = "hyperframe" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1b/38/d7f80fd13e6582fb8e0df8c9a653dcc02b03ca34f4d72f34869298c5baf8/h2-4.2.0.tar.gz", hash = "sha256:c8a52129695e88b1a0578d8d2cc6842bbd79128ac685463b887ee278126ad01f", size = 2150682 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d0/9e/984486f2d0a0bd2b024bf4bc1c62688fcafa9e61991f041fb0e2def4a982/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0", size = 60957 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hpack"
|
||||
version = "4.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.7"
|
||||
|
@ -830,6 +955,11 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
http2 = [
|
||||
{ name = "h2" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpx-sse"
|
||||
version = "0.4.0"
|
||||
|
@ -857,6 +987,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/2a/4d/8092df2cb0cafa9fcaf691db851b2fccfe9cad4048e081436bbbdf56e4e1/huggingface_hub-0.29.0-py3-none-any.whl", hash = "sha256:c02daa0b6bafbdacb1320fdfd1dc7151d0940825c88c4ef89837fdb1f6ea0afe", size = 468012 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyperframe"
|
||||
version = "6.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "identify"
|
||||
version = "2.6.7"
|
||||
|
@ -1250,6 +1389,7 @@ unit = [
|
|||
{ name = "chardet" },
|
||||
{ name = "openai" },
|
||||
{ name = "pypdf" },
|
||||
{ name = "qdrant-client" },
|
||||
{ name = "sqlite-vec" },
|
||||
]
|
||||
|
||||
|
@ -1290,6 +1430,7 @@ requires-dist = [
|
|||
{ name = "pytest-cov", marker = "extra == 'dev'" },
|
||||
{ name = "pytest-html", marker = "extra == 'dev'" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "qdrant-client", marker = "extra == 'unit'" },
|
||||
{ name = "requests" },
|
||||
{ name = "rich" },
|
||||
{ name = "rich", marker = "extra == 'codegen'" },
|
||||
|
@ -1314,7 +1455,6 @@ requires-dist = [
|
|||
{ name = "types-setuptools", marker = "extra == 'dev'" },
|
||||
{ name = "uvicorn", marker = "extra == 'dev'" },
|
||||
]
|
||||
provides-extras = ["dev", "unit", "test", "docs", "codegen"]
|
||||
|
||||
[[package]]
|
||||
name = "llama-stack-client"
|
||||
|
@ -2062,6 +2202,18 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "portalocker"
|
||||
version = "2.10.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pywin32", marker = "sys_platform == 'win32'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/fb/a70a4214956182e0d7a9099ab17d50bfcba1056188e9b14f35b9e2b62a0d/portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf", size = 18423 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pre-commit"
|
||||
version = "4.1.0"
|
||||
|
@ -2668,6 +2820,24 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/e3/fe/72e7e166bda3885810bee7b23049133e142f7c80c295bae02c562caeea16/pyzmq-26.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bd8fdee945b877aa3bffc6a5a8816deb048dab0544f9df3731ecd0e54d8c84c9", size = 556563 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "qdrant-client"
|
||||
version = "1.13.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "grpcio" },
|
||||
{ name = "grpcio-tools" },
|
||||
{ name = "httpx", extra = ["http2"] },
|
||||
{ name = "numpy" },
|
||||
{ name = "portalocker" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/eb/58/1e4acd7ff7637ed56a66e5044699e7af6067232703d0b34f05068fc6234b/qdrant_client-1.13.3.tar.gz", hash = "sha256:61ca09e07c6d7ac0dfbdeb13dca4fe5f3e08fa430cb0d74d66ef5d023a70adfc", size = 266278 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/dd/b4/bd676f91f5234ab59282e4a110f324029684482cbe08e7a1c77b6338013b/qdrant_client-1.13.3-py3-none-any.whl", hash = "sha256:f52cacbb936e547d3fceb1aaed3e3c56be0ebfd48e8ea495ea3dbc89c671d1d2", size = 306674 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rapidfuzz"
|
||||
version = "3.12.2"
|
||||
|
@ -3417,7 +3587,8 @@ source = { registry = "https://download.pytorch.org/whl/cpu" }
|
|||
resolution-markers = [
|
||||
"python_full_version < '3.11' and sys_platform == 'darwin'",
|
||||
"python_full_version == '3.11.*' and sys_platform == 'darwin'",
|
||||
"python_full_version >= '3.12' and sys_platform == 'darwin'",
|
||||
"python_full_version >= '3.13' and sys_platform == 'darwin'",
|
||||
"python_full_version == '3.12.*' and sys_platform == 'darwin'",
|
||||
]
|
||||
dependencies = [
|
||||
{ name = "filelock", marker = "sys_platform == 'darwin'" },
|
||||
|
@ -3444,8 +3615,10 @@ resolution-markers = [
|
|||
"python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
]
|
||||
dependencies = [
|
||||
{ name = "filelock", marker = "sys_platform != 'darwin'" },
|
||||
|
@ -3482,8 +3655,10 @@ resolution-markers = [
|
|||
"python_full_version < '3.11' and sys_platform == 'darwin'",
|
||||
"python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"python_full_version == '3.11.*' and sys_platform == 'darwin'",
|
||||
"python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"python_full_version >= '3.12' and sys_platform == 'darwin'",
|
||||
"python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||
"python_full_version >= '3.13' and sys_platform == 'darwin'",
|
||||
"python_full_version == '3.12.*' and sys_platform == 'darwin'",
|
||||
]
|
||||
dependencies = [
|
||||
{ name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
|
||||
|
@ -3509,7 +3684,8 @@ source = { registry = "https://download.pytorch.org/whl/cpu" }
|
|||
resolution-markers = [
|
||||
"(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
"(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
]
|
||||
dependencies = [
|
||||
{ name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue