Merge branch 'main' into chore/standard-unsupported-model-err-msg-2517

This commit is contained in:
Rohan Awhad 2025-06-27 13:55:34 -04:00
commit ed4a776cc5
29 changed files with 402 additions and 154 deletions

View file

@ -154,12 +154,6 @@ get_python_cmd() {
fi
}
# Add other required item commands generic to all containers
add_to_container << EOF
# Allows running as non-root user
RUN mkdir -p /.llama/providers.d /.cache
EOF
if [ -n "$run_config" ]; then
# Copy the run config to the build context since it's an absolute path
cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
@ -175,7 +169,7 @@ if [ -n "$run_config" ]; then
echo "Copying external providers directory: $external_providers_dir"
cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
add_to_container << EOF
COPY providers.d /.llama/providers.d
COPY --chmod=g+w providers.d /.llama/providers.d
EOF
fi
@ -268,7 +262,7 @@ fi
# Add other require item commands genearic to all containers
add_to_container << EOF
RUN chmod -R g+rw /app /.llama /.cache
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
EOF
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"

View file

@ -146,7 +146,9 @@ in the runtime configuration to help route to the correct provider.""",
class Provider(BaseModel):
provider_id: str
# provider_id of None means that the provider is not enabled - this happens
# when the provider is enabled via a conditional environment variable
provider_id: str | None
provider_type: str
config: dict[str, Any]

View file

@ -48,6 +48,9 @@ class ProviderImpl(Providers):
ret = []
for api, providers in safe_config.providers.items():
for p in providers:
# Skip providers that are not enabled
if p.provider_id is None:
continue
ret.append(
ProviderInfo(
api=api,

View file

@ -255,6 +255,10 @@ async def instantiate_providers(
impls: dict[Api, Any] = {}
inner_impls_by_provider_id: dict[str, dict[str, Any]] = {f"inner-{x.value}": {} for x in router_apis}
for api_str, provider in sorted_providers:
# Skip providers that are not enabled
if provider.provider_id is None:
continue
deps = {a: impls[a] for a in provider.spec.api_dependencies}
for a in provider.spec.optional_api_dependencies:
if a in impls:

View file

@ -57,9 +57,8 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
provider_dataset_id = dataset_id
# infer provider from source
if metadata:
if metadata.get("provider_id"):
provider_id = metadata.get("provider_id") # pass through from nvidia datasetio
if metadata and metadata.get("provider_id"):
provider_id = metadata.get("provider_id") # pass through from nvidia datasetio
elif source.type == DatasetType.rows.value:
provider_id = "localfs"
elif source.type == DatasetType.uri.value:

View file

@ -14,6 +14,8 @@ from opentelemetry.sdk.trace import SpanProcessor
from opentelemetry.trace import Span
from opentelemetry.trace.span import format_span_id, format_trace_id
from llama_stack.providers.utils.telemetry.tracing import LOCAL_ROOT_SPAN_MARKER
class SQLiteSpanProcessor(SpanProcessor):
def __init__(self, conn_string):
@ -124,7 +126,7 @@ class SQLiteSpanProcessor(SpanProcessor):
(
trace_id,
service_name,
(span_id if span.attributes.get("__root_span__") == "true" else None),
(span_id if span.attributes.get(LOCAL_ROOT_SPAN_MARKER) else None),
datetime.fromtimestamp(span.start_time / 1e9, UTC).isoformat(),
datetime.fromtimestamp(span.end_time / 1e9, UTC).isoformat(),
),

View file

@ -137,6 +137,9 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
async def initialize(self) -> None:
if isinstance(self.config, RemoteChromaVectorIOConfig):
if not self.config.url:
raise ValueError("URL is a required parameter for the remote Chroma provider's config")
log.info(f"Connecting to Chroma server at: {self.config.url}")
url = self.config.url.rstrip("/")
parsed = urlparse(url)

View file

@ -10,7 +10,7 @@ from pydantic import BaseModel
class ChromaVectorIOConfig(BaseModel):
url: str
url: str | None
@classmethod
def sample_run_config(cls, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]:

View file

@ -13,11 +13,11 @@ from llama_stack.schema_utils import json_schema_type
@json_schema_type
class PGVectorVectorIOConfig(BaseModel):
host: str = Field(default="localhost")
port: int = Field(default=5432)
db: str = Field(default="postgres")
user: str = Field(default="postgres")
password: str = Field(default="mysecretpassword")
host: str | None = Field(default="localhost")
port: int | None = Field(default=5432)
db: str | None = Field(default="postgres")
user: str | None = Field(default="postgres")
password: str | None = Field(default="mysecretpassword")
@classmethod
def sample_run_config(

View file

@ -15,7 +15,21 @@ from pydantic import BaseModel, TypeAdapter
from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_io import (
Chunk,
QueryChunksResponse,
SearchRankingOptions,
VectorIO,
VectorStoreChunkingStrategy,
VectorStoreDeleteResponse,
VectorStoreFileContentsResponse,
VectorStoreFileObject,
VectorStoreFileStatus,
VectorStoreListFilesResponse,
VectorStoreListResponse,
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.utils.memory.vector_store import (
EmbeddingIndex,
@ -222,3 +236,108 @@ class PGVectorVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
return self.cache[vector_db_id]
async def openai_create_vector_store(
self,
name: str,
file_ids: list[str] | None = None,
expires_after: dict[str, Any] | None = None,
chunking_strategy: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
embedding_model: str | None = None,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
provider_vector_db_id: str | None = None,
) -> VectorStoreObject:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_list_vector_stores(
self,
limit: int | None = 20,
order: str | None = "desc",
after: str | None = None,
before: str | None = None,
) -> VectorStoreListResponse:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_retrieve_vector_store(
self,
vector_store_id: str,
) -> VectorStoreObject:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_update_vector_store(
self,
vector_store_id: str,
name: str | None = None,
expires_after: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> VectorStoreObject:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_delete_vector_store(
self,
vector_store_id: str,
) -> VectorStoreDeleteResponse:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_search_vector_store(
self,
vector_store_id: str,
query: str | list[str],
filters: dict[str, Any] | None = None,
max_num_results: int | None = 10,
ranking_options: SearchRankingOptions | None = None,
rewrite_query: bool | None = False,
search_mode: str | None = "vector",
) -> VectorStoreSearchResponsePage:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_list_files_in_vector_store(
self,
vector_store_id: str,
limit: int | None = 20,
order: str | None = "desc",
after: str | None = None,
before: str | None = None,
filter: VectorStoreFileStatus | None = None,
) -> VectorStoreListFilesResponse:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_retrieve_vector_store_file(
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileObject:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_retrieve_vector_store_file_contents(
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileContentsResponse:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_update_vector_store_file(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
) -> VectorStoreFileObject:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
async def openai_delete_vector_store_file(
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileObject:
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")

View file

@ -35,6 +35,9 @@ INVALID_SPAN_ID = 0x0000000000000000
INVALID_TRACE_ID = 0x00000000000000000000000000000000
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
# The logical root span may not be visible to this process if a parent context
# is passed in. The local root span is the first local span in a trace.
LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
def trace_id_to_str(trace_id: int) -> str:
@ -180,7 +183,13 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont
trace_id = generate_trace_id()
context = TraceContext(BACKGROUND_LOGGER, trace_id)
attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | (attributes or {})
# Mark this span as the root for the trace for now. The processing of
# traceparent context if supplied comes later and will result in the
# ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root,
# i.e. the root of the spans originating in this process as this is
# needed to ensure that we insert this 'local' root span's id into
# the trace record in sqlite store.
attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {})
context.push_span(name, attributes)
CURRENT_TRACE_CONTEXT.set(context)

View file

@ -24,7 +24,7 @@ providers:
- provider_id: ollama
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:http://localhost:11434}
url: ${env.OLLAMA_URL:=http://localhost:11434}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
@ -32,7 +32,7 @@ providers:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
scoring:
- provider_id: basic
provider_type: inline::basic
@ -40,7 +40,7 @@ providers:
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
openai_api_key: ${env.OPENAI_API_KEY:+}
datasetio:
- provider_id: localfs
provider_type: inline::localfs
@ -48,14 +48,14 @@ providers:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/localfs_datasetio.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/localfs_datasetio.db
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/huggingface}/huggingface_datasetio.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/huggingface}/huggingface_datasetio.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
@ -74,7 +74,7 @@ providers:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/agents_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/agents_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -86,19 +86,19 @@ providers:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/faiss_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/faiss_store.db
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
max_results: 3
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/registry.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/registry.db
models: []
shields: []
vector_dbs: []

View file

@ -46,7 +46,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::meta-reference",
config=MetaReferenceInferenceConfig.sample_run_config(
model="${env.INFERENCE_MODEL}",
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:=null}",
),
)
embedding_provider = Provider(
@ -112,7 +112,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::meta-reference",
config=MetaReferenceInferenceConfig.sample_run_config(
model="${env.SAFETY_MODEL}",
checkpoint_dir="${env.SAFETY_CHECKPOINT_DIR:null}",
checkpoint_dir="${env.SAFETY_CHECKPOINT_DIR:=null}",
),
),
],

View file

@ -16,7 +16,7 @@ providers:
provider_type: inline::meta-reference
config:
model: ${env.INFERENCE_MODEL}
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
quantization:
type: ${env.QUANTIZATION_TYPE:=bf16}
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
@ -29,7 +29,7 @@ providers:
provider_type: inline::meta-reference
config:
model: ${env.SAFETY_MODEL}
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:=null}
quantization:
type: ${env.QUANTIZATION_TYPE:=bf16}
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}

View file

@ -16,7 +16,7 @@ providers:
provider_type: inline::meta-reference
config:
model: ${env.INFERENCE_MODEL}
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
quantization:
type: ${env.QUANTIZATION_TYPE:=bf16}
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}

View file

@ -46,7 +46,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
model_type=ModelType.llm,
)
],
OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"),
OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:+}"),
),
(
"anthropic",
@ -56,7 +56,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
model_type=ModelType.llm,
)
],
AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:}"),
AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:+}"),
),
(
"gemini",
@ -66,17 +66,17 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
model_type=ModelType.llm,
)
],
GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"),
GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:+}"),
),
(
"groq",
[],
GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:+}"),
),
(
"together",
[],
TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:+}"),
),
]
inference_providers = []

View file

@ -15,20 +15,20 @@ providers:
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:}
api_key: ${env.OPENAI_API_KEY:+}
- provider_id: anthropic
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY:}
api_key: ${env.ANTHROPIC_API_KEY:+}
- provider_id: gemini
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:}
api_key: ${env.GEMINI_API_KEY:+}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY:}
api_key: ${env.GROQ_API_KEY:+}
- provider_id: together
provider_type: remote::together
config:

View file

@ -29,7 +29,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_id="vllm-inference",
provider_type="remote::vllm",
config=VLLMInferenceAdapterConfig.sample_run_config(
url="${env.VLLM_URL:http://localhost:8000/v1}",
url="${env.VLLM_URL:=http://localhost:8000/v1}",
),
),
]

View file

@ -12,7 +12,7 @@ providers:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:http://localhost:8000/v1}
url: ${env.VLLM_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}

View file

@ -15,7 +15,7 @@ providers:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:http://localhost:8000/v1}
url: ${env.VLLM_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}

View file

@ -15,7 +15,7 @@ providers:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:http://localhost:8000/v1}
url: ${env.VLLM_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}

View file

@ -44,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_id="vllm-inference",
provider_type="remote::vllm",
config=VLLMInferenceAdapterConfig.sample_run_config(
url="${env.VLLM_URL:http://localhost:8000/v1}",
url="${env.VLLM_URL:=http://localhost:8000/v1}",
),
)
embedding_provider = Provider(

View file

@ -68,7 +68,7 @@ providers:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
- provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec}
- provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec}
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db

View file

@ -175,7 +175,7 @@ def get_distribution_template() -> DistributionTemplate:
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="${env.ENABLE_SQLITE_VEC+sqlite-vec}",
provider_id="${env.ENABLE_SQLITE_VEC:+sqlite-vec}",
provider_type="inline::sqlite-vec",
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),