forked from phoenix-oss/llama-stack-mirror
Merge branch 'main' into eval_api_final
This commit is contained in:
commit
46f2ba5910
46 changed files with 1030 additions and 423 deletions
64
docs/_static/llama-stack-spec.html
vendored
64
docs/_static/llama-stack-spec.html
vendored
|
@ -1126,6 +1126,7 @@
|
|||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Benchmark"
|
||||
"$ref": "#/components/schemas/Benchmark"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1168,14 +1169,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/Dataset"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/Dataset"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1250,14 +1244,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/Model"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/Model"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1332,14 +1319,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/Shield"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/Shield"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1642,14 +1622,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1691,14 +1664,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1773,14 +1739,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/FileUploadResponse"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/FileUploadResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1882,14 +1841,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/VectorDB"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"$ref": "#/components/schemas/VectorDB"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
29
docs/_static/llama-stack-spec.yaml
vendored
29
docs/_static/llama-stack-spec.yaml
vendored
|
@ -774,6 +774,7 @@ paths:
|
|||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Benchmark'
|
||||
$ref: '#/components/schemas/Benchmark'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -802,9 +803,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Dataset'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/Dataset'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -855,9 +854,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Model'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/Model'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -908,9 +905,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Shield'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/Shield'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1112,9 +1107,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1142,9 +1135,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/PostTrainingJobStatusResponse'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1195,9 +1186,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/FileUploadResponse'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/FileUploadResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1266,9 +1255,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorDB'
|
||||
- type: 'null'
|
||||
$ref: '#/components/schemas/VectorDB'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import sys
|
||||
import fire
|
||||
import ruamel.yaml as yaml
|
||||
|
||||
|
@ -21,7 +21,7 @@ from llama_stack.distribution.stack import LlamaStack # noqa: E402
|
|||
|
||||
from .pyopenapi.options import Options # noqa: E402
|
||||
from .pyopenapi.specification import Info, Server # noqa: E402
|
||||
from .pyopenapi.utility import Specification # noqa: E402
|
||||
from .pyopenapi.utility import Specification, validate_api_method_return_types # noqa: E402
|
||||
|
||||
|
||||
def str_presenter(dumper, data):
|
||||
|
@ -39,6 +39,14 @@ def main(output_dir: str):
|
|||
if not output_dir.exists():
|
||||
raise ValueError(f"Directory {output_dir} does not exist")
|
||||
|
||||
# Validate API protocols before generating spec
|
||||
print("Validating API method return types...")
|
||||
return_type_errors = validate_api_method_return_types()
|
||||
if return_type_errors:
|
||||
print("\nAPI Method Return Type Validation Errors:\n")
|
||||
for error in return_type_errors:
|
||||
print(error)
|
||||
sys.exit(1)
|
||||
now = str(datetime.now())
|
||||
print(
|
||||
"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now
|
||||
|
|
|
@ -6,16 +6,19 @@
|
|||
|
||||
import json
|
||||
import typing
|
||||
import inspect
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import TextIO
|
||||
from typing import Any, Dict, List, Optional, Protocol, Type, Union, get_type_hints, get_origin, get_args
|
||||
|
||||
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
|
||||
from llama_stack.distribution.resolver import api_protocol_map
|
||||
|
||||
from .generator import Generator
|
||||
from .options import Options
|
||||
from .specification import Document
|
||||
|
||||
|
||||
THIS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
|
@ -114,3 +117,37 @@ class Specification:
|
|||
)
|
||||
|
||||
f.write(html)
|
||||
|
||||
def is_optional_type(type_: Any) -> bool:
|
||||
"""Check if a type is Optional."""
|
||||
origin = get_origin(type_)
|
||||
args = get_args(type_)
|
||||
return origin is Optional or (origin is Union and type(None) in args)
|
||||
|
||||
|
||||
def validate_api_method_return_types() -> List[str]:
|
||||
"""Validate that all API methods have proper return types."""
|
||||
errors = []
|
||||
protocols = api_protocol_map()
|
||||
|
||||
for protocol_name, protocol in protocols.items():
|
||||
methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
|
||||
|
||||
for method_name, method in methods:
|
||||
if not hasattr(method, '__webmethod__'):
|
||||
continue
|
||||
|
||||
# Only check GET methods
|
||||
if method.__webmethod__.method != "GET":
|
||||
continue
|
||||
|
||||
hints = get_type_hints(method)
|
||||
|
||||
if 'return' not in hints:
|
||||
errors.append(f"Method {protocol_name}.{method_name} has no return type annotation")
|
||||
else:
|
||||
return_type = hints['return']
|
||||
if is_optional_type(return_type):
|
||||
errors.append(f"Method {protocol_name}.{method_name} returns Optional type")
|
||||
|
||||
return errors
|
||||
|
|
|
@ -185,8 +185,12 @@ llama stack build --config llama_stack/templates/ollama/build.yaml
|
|||
:::
|
||||
|
||||
:::{tab-item} Building Container
|
||||
> [!TIP]
|
||||
> Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.
|
||||
|
||||
```{admonition} Podman Alternative
|
||||
:class: tip
|
||||
|
||||
Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.
|
||||
```
|
||||
|
||||
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
|
||||
|
||||
|
|
|
@ -92,6 +92,8 @@ Interactive pages for users to play with and explore Llama Stack API capabilitie
|
|||
|
||||
## Starting the Llama Stack Playground
|
||||
|
||||
### Llama CLI
|
||||
|
||||
To start the Llama Stack Playground, run the following commands:
|
||||
|
||||
1. Start up the Llama Stack API server
|
||||
|
@ -107,3 +109,28 @@ cd llama_stack/distribution/ui
|
|||
pip install -r requirements.txt
|
||||
streamlit run app.py
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
Playground can also be started in a docker image:
|
||||
|
||||
```sh
|
||||
export LLAMA_STACK_URL=http://localhost:11434
|
||||
|
||||
docker run \
|
||||
-p 8501:8501 \
|
||||
-e LLAMA_STACK_ENDPOINT=$LLAMA_STACK_URL \
|
||||
quay.io/jland/llama-stack-playground
|
||||
```
|
||||
|
||||
## Configurable Environment Variables
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Environment Variable | Description | Default Value |
|
||||
|----------------------------|------------------------------------|---------------------------|
|
||||
| LLAMA_STACK_ENDPOINT | The endpoint for the Llama Stack | http://localhost:8321 |
|
||||
| FIREWORKS_API_KEY | API key for Fireworks provider | (empty string) |
|
||||
| TOGETHER_API_KEY | API key for Together provider | (empty string) |
|
||||
| SAMBANOVA_API_KEY | API key for SambaNova provider | (empty string) |
|
||||
| OPENAI_API_KEY | API key for OpenAI provider | (empty string) |
|
||||
|
|
|
@ -3,21 +3,36 @@ orphan: true
|
|||
---
|
||||
# Qdrant
|
||||
|
||||
[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It
|
||||
[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
|
||||
allows you to store and query vectors directly in memory.
|
||||
That means you'll get fast and efficient vector retrieval.
|
||||
|
||||
> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in
|
||||
> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative.
|
||||
>
|
||||
> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\]
|
||||
|
||||
|
||||
|
||||
## Features
|
||||
|
||||
- Easy to use
|
||||
- Lightweight and easy to use
|
||||
- Fully integrated with Llama Stack
|
||||
- Apache 2.0 license terms
|
||||
- Store embeddings and their metadata
|
||||
- Supports search by
|
||||
[Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/)
|
||||
and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search
|
||||
- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/)
|
||||
- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/)
|
||||
- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/)
|
||||
|
||||
## Usage
|
||||
|
||||
To use Qdrant in your Llama Stack project, follow these steps:
|
||||
|
||||
1. Install the necessary dependencies.
|
||||
2. Configure your Llama Stack project to use Faiss.
|
||||
2. Configure your Llama Stack project to use Qdrant.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## Installation
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue