Merge branch 'main' into feat/litellm_sambanova_usage

This commit is contained in:
Jorge Piedrahita Ortiz 2025-03-19 16:51:59 -05:00 committed by GitHub
commit 02a4f9ac59
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
69 changed files with 1128 additions and 445 deletions

View file

@ -1101,14 +1101,7 @@
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/Benchmark"
},
{
"type": "null"
}
]
"$ref": "#/components/schemas/Benchmark"
}
}
}
@ -1150,14 +1143,7 @@
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/Dataset"
},
{
"type": "null"
}
]
"$ref": "#/components/schemas/Dataset"
}
}
}
@ -1232,14 +1218,7 @@
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/Model"
},
{
"type": "null"
}
]
"$ref": "#/components/schemas/Model"
}
}
}
@ -1314,14 +1293,7 @@
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/ScoringFn"
},
{
"type": "null"
}
]
"$ref": "#/components/schemas/ScoringFn"
}
}
}
@ -1363,14 +1335,7 @@
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/Shield"
},
{
"type": "null"
}
]
"$ref": "#/components/schemas/Shield"
}
}
}
@ -1673,14 +1638,7 @@
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
},
{
"type": "null"
}
]
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
}
}
}
@ -1722,14 +1680,7 @@
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
},
{
"type": "null"
}
]
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
}
}
}
@ -1804,14 +1755,7 @@
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/FileUploadResponse"
},
{
"type": "null"
}
]
"$ref": "#/components/schemas/FileUploadResponse"
}
}
}
@ -1913,14 +1857,7 @@
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/VectorDB"
},
{
"type": "null"
}
]
"$ref": "#/components/schemas/VectorDB"
}
}
}
@ -2246,14 +2183,7 @@
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/JobStatus"
},
{
"type": "null"
}
]
"$ref": "#/components/schemas/JobStatus"
}
}
}
@ -4229,70 +4159,80 @@
]
},
"arguments": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
},
{
"type": "array",
"items": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
"oneOf": [
{
"type": "string"
},
{
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
},
{
"type": "array",
"items": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
}
]
}
},
{
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
},
{
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
}
]
}
}
]
}
]
}
}
]
},
"arguments_json": {
"type": "string"
}
},
"additionalProperties": false,
@ -7857,7 +7797,8 @@
"type": "object",
"properties": {
"document_id": {
"type": "string"
"type": "string",
"description": "The unique identifier for the document."
},
"content": {
"oneOf": [
@ -7876,10 +7817,12 @@
{
"$ref": "#/components/schemas/URL"
}
]
],
"description": "The content of the document."
},
"mime_type": {
"type": "string"
"type": "string",
"description": "The MIME type of the document."
},
"metadata": {
"type": "object",
@ -7904,7 +7847,8 @@
"type": "object"
}
]
}
},
"description": "Additional metadata for the document."
}
},
"additionalProperties": false,
@ -7913,7 +7857,8 @@
"content",
"metadata"
],
"title": "RAGDocument"
"title": "RAGDocument",
"description": "A document to be used for document ingestion in the RAG Tool."
},
"InsertRequest": {
"type": "object",
@ -9917,23 +9862,6 @@
],
"title": "ScoreBatchResponse"
},
"AlgorithmConfig": {
"oneOf": [
{
"$ref": "#/components/schemas/LoraFinetuningConfig"
},
{
"$ref": "#/components/schemas/QATFinetuningConfig"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"LoRA": "#/components/schemas/LoraFinetuningConfig",
"QAT": "#/components/schemas/QATFinetuningConfig"
}
}
},
"LoraFinetuningConfig": {
"type": "object",
"properties": {
@ -10069,7 +9997,14 @@
"type": "string"
},
"algorithm_config": {
"$ref": "#/components/schemas/AlgorithmConfig"
"oneOf": [
{
"$ref": "#/components/schemas/LoraFinetuningConfig"
},
{
"$ref": "#/components/schemas/QATFinetuningConfig"
}
]
}
},
"additionalProperties": false,

View file

@ -757,9 +757,7 @@ paths:
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/Benchmark'
- type: 'null'
$ref: '#/components/schemas/Benchmark'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -787,9 +785,7 @@ paths:
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/Dataset'
- type: 'null'
$ref: '#/components/schemas/Dataset'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -840,9 +836,7 @@ paths:
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/Model'
- type: 'null'
$ref: '#/components/schemas/Model'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -893,9 +887,7 @@ paths:
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/ScoringFn'
- type: 'null'
$ref: '#/components/schemas/ScoringFn'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -923,9 +915,7 @@ paths:
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/Shield'
- type: 'null'
$ref: '#/components/schemas/Shield'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -1127,9 +1117,7 @@ paths:
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
- type: 'null'
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -1157,9 +1145,7 @@ paths:
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/PostTrainingJobStatusResponse'
- type: 'null'
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -1210,9 +1196,7 @@ paths:
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/FileUploadResponse'
- type: 'null'
$ref: '#/components/schemas/FileUploadResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -1281,9 +1265,7 @@ paths:
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/VectorDB'
- type: 'null'
$ref: '#/components/schemas/VectorDB'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -1509,9 +1491,7 @@ paths:
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/JobStatus'
- type: 'null'
$ref: '#/components/schemas/JobStatus'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -2884,30 +2864,34 @@ components:
title: BuiltinTool
- type: string
arguments:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
- type: array
items:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
- type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
oneOf:
- type: string
- type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
- type: array
items:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
- type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
arguments_json:
type: string
additionalProperties: false
required:
- call_id
@ -5395,6 +5379,7 @@ components:
properties:
document_id:
type: string
description: The unique identifier for the document.
content:
oneOf:
- type: string
@ -5403,8 +5388,10 @@ components:
items:
$ref: '#/components/schemas/InterleavedContentItem'
- $ref: '#/components/schemas/URL'
description: The content of the document.
mime_type:
type: string
description: The MIME type of the document.
metadata:
type: object
additionalProperties:
@ -5415,12 +5402,15 @@ components:
- type: string
- type: array
- type: object
description: Additional metadata for the document.
additionalProperties: false
required:
- document_id
- content
- metadata
title: RAGDocument
description: >-
A document to be used for document ingestion in the RAG Tool.
InsertRequest:
type: object
properties:
@ -6698,15 +6688,6 @@ components:
required:
- results
title: ScoreBatchResponse
AlgorithmConfig:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
- $ref: '#/components/schemas/QATFinetuningConfig'
discriminator:
propertyName: type
mapping:
LoRA: '#/components/schemas/LoraFinetuningConfig'
QAT: '#/components/schemas/QATFinetuningConfig'
LoraFinetuningConfig:
type: object
properties:
@ -6790,7 +6771,9 @@ components:
checkpoint_dir:
type: string
algorithm_config:
$ref: '#/components/schemas/AlgorithmConfig'
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
- $ref: '#/components/schemas/QATFinetuningConfig'
additionalProperties: false
required:
- job_uuid

View file

@ -12,7 +12,7 @@
from datetime import datetime
from pathlib import Path
import sys
import fire
import ruamel.yaml as yaml
@ -21,7 +21,7 @@ from llama_stack.distribution.stack import LlamaStack # noqa: E402
from .pyopenapi.options import Options # noqa: E402
from .pyopenapi.specification import Info, Server # noqa: E402
from .pyopenapi.utility import Specification # noqa: E402
from .pyopenapi.utility import Specification, validate_api_method_return_types # noqa: E402
def str_presenter(dumper, data):
@ -39,6 +39,14 @@ def main(output_dir: str):
if not output_dir.exists():
raise ValueError(f"Directory {output_dir} does not exist")
# Validate API protocols before generating spec
print("Validating API method return types...")
return_type_errors = validate_api_method_return_types()
if return_type_errors:
print("\nAPI Method Return Type Validation Errors:\n")
for error in return_type_errors:
print(error)
sys.exit(1)
now = str(datetime.now())
print(
"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now

View file

@ -6,16 +6,19 @@
import json
import typing
import inspect
import os
from pathlib import Path
from typing import TextIO
from typing import Any, Dict, List, Optional, Protocol, Type, Union, get_type_hints, get_origin, get_args
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
from llama_stack.distribution.resolver import api_protocol_map
from .generator import Generator
from .options import Options
from .specification import Document
THIS_DIR = Path(__file__).parent
@ -114,3 +117,37 @@ class Specification:
)
f.write(html)
def is_optional_type(type_: Any) -> bool:
"""Check if a type is Optional."""
origin = get_origin(type_)
args = get_args(type_)
return origin is Optional or (origin is Union and type(None) in args)
def validate_api_method_return_types() -> List[str]:
"""Validate that all API methods have proper return types."""
errors = []
protocols = api_protocol_map()
for protocol_name, protocol in protocols.items():
methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
for method_name, method in methods:
if not hasattr(method, '__webmethod__'):
continue
# Only check GET methods
if method.__webmethod__.method != "GET":
continue
hints = get_type_hints(method)
if 'return' not in hints:
errors.append(f"Method {protocol_name}.{method_name} has no return type annotation")
else:
return_type = hints['return']
if is_optional_type(return_type):
errors.append(f"Method {protocol_name}.{method_name} returns Optional type")
return errors

View file

@ -15,8 +15,6 @@ Llama Stack defines and standardizes the core building blocks needed to bring ge
- **Multiple developer interfaces** like CLI and SDKs for Python, Node, iOS, and Android
- **Standalone applications** as examples for how to build production-grade AI applications with Llama Stack
We focus on making it easy to build production applications with the Llama model family - from the latest Llama 3.3 to specialized models like Llama Guard for safety.
```{image} ../_static/llama-stack.png
:alt: Llama Stack
:width: 400px

View file

@ -48,7 +48,7 @@ Llama Stack addresses these challenges through a service-oriented, API-first app
**Robust Ecosystem**
- Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies).
- Ecosystem offers tailored infrastructure, software, and services for deploying Llama models.
- Ecosystem offers tailored infrastructure, software, and services for deploying a variety of models.
### Our Philosophy
@ -57,7 +57,6 @@ Llama Stack addresses these challenges through a service-oriented, API-first app
- **Composability**: Every component is independent but works together seamlessly
- **Production Ready**: Built for real-world applications, not just demos
- **Turnkey Solutions**: Easy to deploy built in solutions for popular deployment scenarios
- **Llama First**: Explicit focus on Meta's Llama models and partnering ecosystem
With Llama Stack, you can focus on building your application while we handle the infrastructure complexity, essential capabilities, and provider integrations.

View file

@ -3,21 +3,36 @@ orphan: true
---
# Qdrant
[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It
[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
allows you to store and query vectors directly in memory.
That means you'll get fast and efficient vector retrieval.
> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in
> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative.
>
> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\]
## Features
- Easy to use
- Lightweight and easy to use
- Fully integrated with Llama Stack
- Apache 2.0 license terms
- Store embeddings and their metadata
- Supports search by
[Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/)
and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search
- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/)
- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/)
- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/)
## Usage
To use Qdrant in your Llama Stack project, follow these steps:
1. Install the necessary dependencies.
2. Configure your Llama Stack project to use Faiss.
2. Configure your Llama Stack project to use Qdrant.
3. Start storing and querying vectors.
## Installation