mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
Merge branch 'main' into fix-urllib3
This commit is contained in:
commit
9410d8f7a4
23 changed files with 335 additions and 85 deletions
142
.github/workflows/integration-vector-io-tests.yml
vendored
Normal file
142
.github/workflows/integration-vector-io-tests.yml
vendored
Normal file
|
@ -0,0 +1,142 @@
|
|||
name: Vector IO Integration Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- 'llama_stack/**'
|
||||
- 'tests/integration/vector_io/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- '.github/workflows/integration-vector-io-tests.yml' # This workflow
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "remote::chromadb", "remote::pgvector"]
|
||||
python-version: ["3.12", "3.13"]
|
||||
fail-fast: false # we want to run all tests regardless of failure
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Setup Chroma
|
||||
if: matrix.vector-io-provider == 'remote::chromadb'
|
||||
run: |
|
||||
docker run --rm -d --pull always \
|
||||
--name chromadb \
|
||||
-p 8000:8000 \
|
||||
-v ~/chroma:/chroma/chroma \
|
||||
-e IS_PERSISTENT=TRUE \
|
||||
-e ANONYMIZED_TELEMETRY=FALSE \
|
||||
chromadb/chroma:latest
|
||||
|
||||
- name: Start PGVector DB
|
||||
if: matrix.vector-io-provider == 'remote::pgvector'
|
||||
run: |
|
||||
docker run -d \
|
||||
--name pgvector \
|
||||
-e POSTGRES_USER=llamastack \
|
||||
-e POSTGRES_PASSWORD=llamastack \
|
||||
-e POSTGRES_DB=llamastack \
|
||||
-p 5432:5432 \
|
||||
pgvector/pgvector:pg17
|
||||
|
||||
- name: Wait for PGVector to be ready
|
||||
if: matrix.vector-io-provider == 'remote::pgvector'
|
||||
run: |
|
||||
echo "Waiting for Postgres to be ready..."
|
||||
for i in {1..30}; do
|
||||
if docker exec pgvector pg_isready -U llamastack > /dev/null 2>&1; then
|
||||
echo "Postgres is ready!"
|
||||
break
|
||||
fi
|
||||
echo "Not ready yet... ($i)"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
- name: Enable pgvector extension
|
||||
if: matrix.vector-io-provider == 'remote::pgvector'
|
||||
run: |
|
||||
PGPASSWORD=llamastack psql -h localhost -U llamastack -d llamastack \
|
||||
-c "CREATE EXTENSION IF NOT EXISTS vector;"
|
||||
|
||||
- name: Wait for ChromaDB to be ready
|
||||
if: matrix.vector-io-provider == 'remote::chromadb'
|
||||
run: |
|
||||
echo "Waiting for ChromaDB to be ready..."
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:8000/api/v2/heartbeat | grep -q "nanosecond heartbeat"; then
|
||||
echo "ChromaDB is ready!"
|
||||
exit 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
echo "ChromaDB failed to start"
|
||||
docker logs chromadb
|
||||
exit 1
|
||||
|
||||
- name: Build Llama Stack
|
||||
run: |
|
||||
uv run llama stack build --template starter --image-type venv
|
||||
|
||||
- name: Check Storage and Memory Available Before Tests
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
free -h
|
||||
df -h
|
||||
|
||||
- name: Run Vector IO Integration Tests
|
||||
env:
|
||||
ENABLE_CHROMADB: ${{ matrix.vector-io-provider == 'remote::chromadb' && 'true' || '' }}
|
||||
CHROMADB_URL: ${{ matrix.vector-io-provider == 'remote::chromadb' && 'http://localhost:8000' || '' }}
|
||||
ENABLE_PGVECTOR: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'true' || '' }}
|
||||
PGVECTOR_HOST: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'localhost' || '' }}
|
||||
PGVECTOR_PORT: ${{ matrix.vector-io-provider == 'remote::pgvector' && '5432' || '' }}
|
||||
PGVECTOR_DB: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
||||
PGVECTOR_USER: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
||||
PGVECTOR_PASSWORD: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
||||
run: |
|
||||
uv run pytest -sv --stack-config="inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
|
||||
tests/integration/vector_io \
|
||||
--embedding-model all-MiniLM-L6-v2
|
||||
|
||||
- name: Check Storage and Memory Available After Tests
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
free -h
|
||||
df -h
|
||||
|
||||
- name: Create sanitized provider name
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
echo "SANITIZED_PROVIDER=$(echo "${{ matrix.vector-io-provider }}" | tr ':' '_')" >> $GITHUB_ENV
|
||||
|
||||
- name: Write ChromaDB logs to file
|
||||
if: ${{ always() && matrix.vector-io-provider == 'remote::chromadb' }}
|
||||
run: |
|
||||
docker logs chromadb > chromadb.log
|
||||
|
||||
- name: Upload all logs to artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }}
|
||||
path: |
|
||||
*.log
|
||||
retention-days: 1
|
70
CHANGELOG.md
70
CHANGELOG.md
|
@ -1,5 +1,28 @@
|
|||
# Changelog
|
||||
|
||||
# v0.2.12
|
||||
Published on: 2025-06-20T22:52:12Z
|
||||
|
||||
## Highlights
|
||||
* Filter support in file search
|
||||
* Support auth attributes in inference and response stores
|
||||
|
||||
|
||||
---
|
||||
|
||||
# v0.2.11
|
||||
Published on: 2025-06-17T20:26:26Z
|
||||
|
||||
## Highlights
|
||||
* OpenAI-compatible vector store APIs
|
||||
* Hybrid Search in Sqlite-vec
|
||||
* File search tool in Responses API
|
||||
* Pagination in inference and response stores
|
||||
* Added `suffix` to completions API for fill-in-the-middle tasks
|
||||
|
||||
|
||||
---
|
||||
|
||||
# v0.2.10.1
|
||||
Published on: 2025-06-06T20:11:02Z
|
||||
|
||||
|
@ -481,51 +504,4 @@ Published on: 2024-11-23T17:14:07Z
|
|||
|
||||
|
||||
|
||||
---
|
||||
|
||||
# v0.0.54
|
||||
Published on: 2024-11-22T00:36:09Z
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
# v0.0.53
|
||||
Published on: 2024-11-20T22:18:00Z
|
||||
|
||||
🚀 Initial Release Notes for Llama Stack!
|
||||
|
||||
### Added
|
||||
- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
|
||||
- Persistence for registered objects with distribution
|
||||
- Ability to persist memory banks created for FAISS
|
||||
- PostgreSQL KVStore implementation
|
||||
- Environment variable placeholder support in run.yaml files
|
||||
- Comprehensive Zero-to-Hero notebooks and quickstart guides
|
||||
- Support for quantized models in Ollama
|
||||
- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
|
||||
- Bedrock distribution with safety shields support
|
||||
- Evals API with task registration and scoring functions
|
||||
- MMLU and SimpleQA benchmark scoring functions
|
||||
- Huggingface dataset provider integration for benchmarks
|
||||
- Support for custom dataset registration from local paths
|
||||
- Benchmark evaluation CLI tools with visualization tables
|
||||
- RAG evaluation scoring functions and metrics
|
||||
- Local persistence for datasets and eval tasks
|
||||
|
||||
### Changed
|
||||
- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
|
||||
- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`)
|
||||
- Updated API signatures for dataset and eval task registration
|
||||
- Restructured folder organization for providers
|
||||
- Enhanced Docker build configuration
|
||||
- Added version prefixing for REST API routes
|
||||
- Enhanced evaluation task registration workflow
|
||||
- Improved benchmark evaluation output formatting
|
||||
- Restructured evals folder organization for better modularity
|
||||
|
||||
### Removed
|
||||
- `llama stack configure` command
|
||||
|
||||
|
||||
---
|
||||
|
|
|
@ -146,7 +146,9 @@ in the runtime configuration to help route to the correct provider.""",
|
|||
|
||||
|
||||
class Provider(BaseModel):
|
||||
provider_id: str
|
||||
# provider_id of None means that the provider is not enabled - this happens
|
||||
# when the provider is enabled via a conditional environment variable
|
||||
provider_id: str | None
|
||||
provider_type: str
|
||||
config: dict[str, Any]
|
||||
|
||||
|
|
|
@ -48,6 +48,9 @@ class ProviderImpl(Providers):
|
|||
ret = []
|
||||
for api, providers in safe_config.providers.items():
|
||||
for p in providers:
|
||||
# Skip providers that are not enabled
|
||||
if p.provider_id is None:
|
||||
continue
|
||||
ret.append(
|
||||
ProviderInfo(
|
||||
api=api,
|
||||
|
|
|
@ -255,6 +255,10 @@ async def instantiate_providers(
|
|||
impls: dict[Api, Any] = {}
|
||||
inner_impls_by_provider_id: dict[str, dict[str, Any]] = {f"inner-{x.value}": {} for x in router_apis}
|
||||
for api_str, provider in sorted_providers:
|
||||
# Skip providers that are not enabled
|
||||
if provider.provider_id is None:
|
||||
continue
|
||||
|
||||
deps = {a: impls[a] for a in provider.spec.api_dependencies}
|
||||
for a in provider.spec.optional_api_dependencies:
|
||||
if a in impls:
|
||||
|
|
|
@ -137,6 +137,9 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
|
||||
async def initialize(self) -> None:
|
||||
if isinstance(self.config, RemoteChromaVectorIOConfig):
|
||||
if not self.config.url:
|
||||
raise ValueError("URL is a required parameter for the remote Chroma provider's config")
|
||||
|
||||
log.info(f"Connecting to Chroma server at: {self.config.url}")
|
||||
url = self.config.url.rstrip("/")
|
||||
parsed = urlparse(url)
|
||||
|
|
|
@ -10,7 +10,7 @@ from pydantic import BaseModel
|
|||
|
||||
|
||||
class ChromaVectorIOConfig(BaseModel):
|
||||
url: str
|
||||
url: str | None
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]:
|
||||
|
|
|
@ -13,11 +13,11 @@ from llama_stack.schema_utils import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class PGVectorVectorIOConfig(BaseModel):
|
||||
host: str = Field(default="localhost")
|
||||
port: int = Field(default=5432)
|
||||
db: str = Field(default="postgres")
|
||||
user: str = Field(default="postgres")
|
||||
password: str = Field(default="mysecretpassword")
|
||||
host: str | None = Field(default="localhost")
|
||||
port: int | None = Field(default=5432)
|
||||
db: str | None = Field(default="postgres")
|
||||
user: str | None = Field(default="postgres")
|
||||
password: str | None = Field(default="mysecretpassword")
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
|
|
|
@ -15,7 +15,21 @@ from pydantic import BaseModel, TypeAdapter
|
|||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
QueryChunksResponse,
|
||||
SearchRankingOptions,
|
||||
VectorIO,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreListFilesResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
EmbeddingIndex,
|
||||
|
@ -222,3 +236,108 @@ class PGVectorVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
|
||||
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
|
||||
return self.cache[vector_db_id]
|
||||
|
||||
async def openai_create_vector_store(
|
||||
self,
|
||||
name: str,
|
||||
file_ids: list[str] | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
chunking_strategy: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_list_vector_stores(
|
||||
self,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
) -> VectorStoreListResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_retrieve_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_update_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
name: str | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_delete_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreDeleteResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_search_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
query: str | list[str],
|
||||
filters: dict[str, Any] | None = None,
|
||||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_attach_file_to_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
attributes: dict[str, Any] | None = None,
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
||||
) -> VectorStoreFileObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_list_files_in_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
filter: VectorStoreFileStatus | None = None,
|
||||
) -> VectorStoreListFilesResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_retrieve_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_retrieve_vector_store_file_contents(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileContentsResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_update_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
attributes: dict[str, Any] | None = None,
|
||||
) -> VectorStoreFileObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
||||
async def openai_delete_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
|
|
|
@ -24,7 +24,7 @@ providers:
|
|||
- provider_id: ollama
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
@ -32,7 +32,7 @@ providers:
|
|||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
@ -40,7 +40,7 @@ providers:
|
|||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
openai_api_key: ${env.OPENAI_API_KEY:+}
|
||||
datasetio:
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
|
@ -48,14 +48,14 @@ providers:
|
|||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/localfs_datasetio.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/localfs_datasetio.db
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/huggingface}/huggingface_datasetio.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/huggingface}/huggingface_datasetio.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
@ -74,7 +74,7 @@ providers:
|
|||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/agents_store.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/agents_store.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
@ -86,19 +86,19 @@ providers:
|
|||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/faiss_store.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/faiss_store.db
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
||||
max_results: 3
|
||||
|
||||
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/registry.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/registry.db
|
||||
models: []
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
|
|
|
@ -46,7 +46,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::meta-reference",
|
||||
config=MetaReferenceInferenceConfig.sample_run_config(
|
||||
model="${env.INFERENCE_MODEL}",
|
||||
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
||||
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:=null}",
|
||||
),
|
||||
)
|
||||
embedding_provider = Provider(
|
||||
|
@ -112,7 +112,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::meta-reference",
|
||||
config=MetaReferenceInferenceConfig.sample_run_config(
|
||||
model="${env.SAFETY_MODEL}",
|
||||
checkpoint_dir="${env.SAFETY_CHECKPOINT_DIR:null}",
|
||||
checkpoint_dir="${env.SAFETY_CHECKPOINT_DIR:=null}",
|
||||
),
|
||||
),
|
||||
],
|
||||
|
|
|
@ -16,7 +16,7 @@ providers:
|
|||
provider_type: inline::meta-reference
|
||||
config:
|
||||
model: ${env.INFERENCE_MODEL}
|
||||
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
|
||||
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
|
||||
quantization:
|
||||
type: ${env.QUANTIZATION_TYPE:=bf16}
|
||||
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
|
||||
|
@ -29,7 +29,7 @@ providers:
|
|||
provider_type: inline::meta-reference
|
||||
config:
|
||||
model: ${env.SAFETY_MODEL}
|
||||
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
|
||||
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:=null}
|
||||
quantization:
|
||||
type: ${env.QUANTIZATION_TYPE:=bf16}
|
||||
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
|
||||
|
|
|
@ -16,7 +16,7 @@ providers:
|
|||
provider_type: inline::meta-reference
|
||||
config:
|
||||
model: ${env.INFERENCE_MODEL}
|
||||
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
|
||||
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
|
||||
quantization:
|
||||
type: ${env.QUANTIZATION_TYPE:=bf16}
|
||||
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
|
||||
|
|
|
@ -46,7 +46,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
|||
model_type=ModelType.llm,
|
||||
)
|
||||
],
|
||||
OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"),
|
||||
OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:+}"),
|
||||
),
|
||||
(
|
||||
"anthropic",
|
||||
|
@ -56,7 +56,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
|||
model_type=ModelType.llm,
|
||||
)
|
||||
],
|
||||
AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:}"),
|
||||
AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:+}"),
|
||||
),
|
||||
(
|
||||
"gemini",
|
||||
|
@ -66,17 +66,17 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
|||
model_type=ModelType.llm,
|
||||
)
|
||||
],
|
||||
GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"),
|
||||
GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:+}"),
|
||||
),
|
||||
(
|
||||
"groq",
|
||||
[],
|
||||
GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
|
||||
GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:+}"),
|
||||
),
|
||||
(
|
||||
"together",
|
||||
[],
|
||||
TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
|
||||
TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:+}"),
|
||||
),
|
||||
]
|
||||
inference_providers = []
|
||||
|
|
|
@ -15,20 +15,20 @@ providers:
|
|||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
api_key: ${env.OPENAI_API_KEY:}
|
||||
api_key: ${env.OPENAI_API_KEY:+}
|
||||
- provider_id: anthropic
|
||||
provider_type: remote::anthropic
|
||||
config:
|
||||
api_key: ${env.ANTHROPIC_API_KEY:}
|
||||
api_key: ${env.ANTHROPIC_API_KEY:+}
|
||||
- provider_id: gemini
|
||||
provider_type: remote::gemini
|
||||
config:
|
||||
api_key: ${env.GEMINI_API_KEY:}
|
||||
api_key: ${env.GEMINI_API_KEY:+}
|
||||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
api_key: ${env.GROQ_API_KEY:}
|
||||
api_key: ${env.GROQ_API_KEY:+}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
|
|
|
@ -29,7 +29,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_id="vllm-inference",
|
||||
provider_type="remote::vllm",
|
||||
config=VLLMInferenceAdapterConfig.sample_run_config(
|
||||
url="${env.VLLM_URL:http://localhost:8000/v1}",
|
||||
url="${env.VLLM_URL:=http://localhost:8000/v1}",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
|
@ -12,7 +12,7 @@ providers:
|
|||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:http://localhost:8000/v1}
|
||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
|
|
|
@ -15,7 +15,7 @@ providers:
|
|||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:http://localhost:8000/v1}
|
||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
|
|
|
@ -15,7 +15,7 @@ providers:
|
|||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:http://localhost:8000/v1}
|
||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
|
|
|
@ -44,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_id="vllm-inference",
|
||||
provider_type="remote::vllm",
|
||||
config=VLLMInferenceAdapterConfig.sample_run_config(
|
||||
url="${env.VLLM_URL:http://localhost:8000/v1}",
|
||||
url="${env.VLLM_URL:=http://localhost:8000/v1}",
|
||||
),
|
||||
)
|
||||
embedding_provider = Provider(
|
||||
|
|
|
@ -68,7 +68,7 @@ providers:
|
|||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
||||
- provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec}
|
||||
- provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec}
|
||||
provider_type: inline::sqlite-vec
|
||||
config:
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
|
||||
|
|
|
@ -175,7 +175,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.ENABLE_SQLITE_VEC+sqlite-vec}",
|
||||
provider_id="${env.ENABLE_SQLITE_VEC:+sqlite-vec}",
|
||||
provider_type="inline::sqlite-vec",
|
||||
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
|
|
|
@ -29,6 +29,7 @@ mapfile -t py_dirs < <(
|
|||
-type f \
|
||||
-name "*.py" ! -name "__init__.py" \
|
||||
! -path "*/.venv/*" \
|
||||
! -path "*/node_modules/*" \
|
||||
-exec dirname {} \; | sort -u
|
||||
)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue