update starter

This commit is contained in:
Hardik Shah 2025-07-09 17:40:21 -07:00
parent 7915551eee
commit 0369dd4191
44 changed files with 351 additions and 670 deletions

View file

@ -13,7 +13,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
## Sample Configuration
```yaml
api_key: ${env.ANTHROPIC_API_KEY}
api_key: ${env.ANTHROPIC_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API fo
```yaml
openai_compat_api_base: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY}
api_key: ${env.CEREBRAS_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
```yaml
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY}
api_key: ${env.CEREBRAS_API_KEY:=}
```

View file

@ -14,8 +14,8 @@ Databricks inference provider for running models on Databricks' unified analytic
## Sample Configuration
```yaml
url: ${env.DATABRICKS_URL}
api_token: ${env.DATABRICKS_API_TOKEN}
url: ${env.DATABRICKS_URL:=}
api_token: ${env.DATABRICKS_API_TOKEN:=}
```

View file

@ -15,7 +15,7 @@ Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI A
```yaml
openai_compat_api_base: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY}
api_key: ${env.FIREWORKS_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
```yaml
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY}
api_key: ${env.FIREWORKS_API_KEY:=}
```

View file

@ -13,7 +13,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
## Sample Configuration
```yaml
api_key: ${env.GEMINI_API_KEY}
api_key: ${env.GEMINI_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Groq OpenAI-compatible provider for using Groq models with OpenAI API format.
```yaml
openai_compat_api_base: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY}
api_key: ${env.GROQ_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
```yaml
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY}
api_key: ${env.GROQ_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
```yaml
openai_compat_api_base: https://api.llama.com/compat/v1/
api_key: ${env.LLAMA_API_KEY}
api_key: ${env.LLAMA_API_KEY:=}
```

View file

@ -13,7 +13,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
## Sample Configuration
```yaml
api_key: ${env.OPENAI_API_KEY}
api_key: ${env.OPENAI_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Passthrough inference provider for connecting to any external inference service
```yaml
url: ${env.PASSTHROUGH_URL}
api_key: ${env.PASSTHROUGH_API_KEY}
api_key: ${env.PASSTHROUGH_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API
```yaml
openai_compat_api_base: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
api_key: ${env.SAMBANOVA_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
```yaml
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
api_key: ${env.SAMBANOVA_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Together AI OpenAI-compatible provider for using Together models with OpenAI API
```yaml
openai_compat_api_base: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY}
api_key: ${env.TOGETHER_API_KEY:=}
```

View file

@ -15,7 +15,7 @@ Together AI inference provider for open-source models and collaborative AI devel
```yaml
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY}
api_key: ${env.TOGETHER_API_KEY:=}
```

View file

@ -16,7 +16,7 @@ Remote vLLM inference provider for connecting to vLLM servers.
## Sample Configuration
```yaml
url: ${env.VLLM_URL}
url: ${env.VLLM_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}

View file

@ -15,7 +15,7 @@ SambaNova's safety provider for content moderation and safety filtering.
```yaml
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
api_key: ${env.SAMBANOVA_API_KEY:=}
```

View file

@ -24,7 +24,7 @@ Please refer to the inline provider documentation.
## Sample Configuration
```yaml
api_key: ${env.QDRANT_API_KEY}
api_key: ${env.QDRANT_API_KEY:=}
```

View file

@ -26,7 +26,7 @@ class AnthropicConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"api_key": api_key,
}

View file

@ -65,9 +65,10 @@ class CerebrasInferenceAdapter(
)
self.config = config
api_key = self.config.api_key.get_secret_value() if self.config.api_key else ""
self.client = AsyncCerebras(
base_url=self.config.base_url,
api_key=self.config.api_key.get_secret_value(),
api_key=api_key,
)
async def initialize(self) -> None:

View file

@ -26,7 +26,7 @@ class CerebrasImplConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"base_url": DEFAULT_BASE_URL,
"api_key": api_key,

View file

@ -31,7 +31,7 @@ class CerebrasCompatConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"openai_compat_api_base": "https://api.cerebras.ai/v1",
"api_key": api_key,

View file

@ -25,8 +25,8 @@ class DatabricksImplConfig(BaseModel):
@classmethod
def sample_run_config(
cls,
url: str = "${env.DATABRICKS_URL}",
api_token: str = "${env.DATABRICKS_API_TOKEN}",
url: str = "${env.DATABRICKS_URL:=}",
api_token: str = "${env.DATABRICKS_API_TOKEN:=}",
**kwargs: Any,
) -> dict[str, Any]:
return {

View file

@ -23,7 +23,7 @@ class FireworksImplConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"url": "https://api.fireworks.ai/inference/v1",
"api_key": api_key,

View file

@ -96,6 +96,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
def _get_client(self) -> Fireworks:
fireworks_api_key = self._get_api_key()
print(f">>>>>> fireworks_api_key: {fireworks_api_key} <<<<<")
return Fireworks(api_key=fireworks_api_key)
def _get_openai_client(self) -> AsyncOpenAI:

View file

@ -31,7 +31,7 @@ class FireworksCompatConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"openai_compat_api_base": "https://api.fireworks.ai/inference/v1",
"api_key": api_key,

View file

@ -26,7 +26,7 @@ class GeminiConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"api_key": api_key,
}

View file

@ -32,7 +32,7 @@ class GroqConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"url": "https://api.groq.com",
"api_key": api_key,

View file

@ -31,7 +31,7 @@ class GroqCompatConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"openai_compat_api_base": "https://api.groq.com/openai/v1",
"api_key": api_key,

View file

@ -31,7 +31,7 @@ class LlamaCompatConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"openai_compat_api_base": "https://api.llama.com/compat/v1/",
"api_key": api_key,

View file

@ -26,7 +26,7 @@ class OpenAIConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"api_key": api_key,
}

View file

@ -25,7 +25,7 @@ class PassthroughImplConfig(BaseModel):
@classmethod
def sample_run_config(
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY:=}", **kwargs
) -> dict[str, Any]:
return {
"url": url,

View file

@ -30,7 +30,7 @@ class SambaNovaImplConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"url": "https://api.sambanova.ai/v1",
"api_key": api_key,

View file

@ -31,7 +31,7 @@ class SambaNovaCompatConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"openai_compat_api_base": "https://api.sambanova.ai/v1",
"api_key": api_key,

View file

@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return {
"url": "https://api.together.xyz/v1",
"api_key": "${env.TOGETHER_API_KEY}",
"api_key": "${env.TOGETHER_API_KEY:=}",
}

View file

@ -31,7 +31,7 @@ class TogetherCompatConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.TOGETHER_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.TOGETHER_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"openai_compat_api_base": "https://api.together.xyz/v1",
"api_key": api_key,

View file

@ -46,7 +46,7 @@ class VLLMInferenceAdapterConfig(BaseModel):
@classmethod
def sample_run_config(
cls,
url: str = "${env.VLLM_URL}",
url: str = "${env.VLLM_URL:=http://localhost:8000/v1}",
**kwargs,
):
return {

View file

@ -30,7 +30,7 @@ class SambaNovaSafetyConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
return {
"url": "https://api.sambanova.ai/v1",
"api_key": api_key,

View file

@ -27,5 +27,5 @@ class QdrantVectorIOConfig(BaseModel):
@classmethod
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
return {
"api_key": "${env.QDRANT_API_KEY}",
"api_key": "${env.QDRANT_API_KEY:=}",
}

View file

@ -33,7 +33,7 @@ providers:
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY}
api_key: ${env.TOGETHER_API_KEY:=}
vector_io:
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec

View file

@ -6,27 +6,13 @@ distribution_spec:
- remote::cerebras
- remote::ollama
- remote::vllm
- remote::tgi
- remote::hf::serverless
- remote::hf::endpoint
- remote::fireworks
- remote::together
- remote::bedrock
- remote::databricks
- remote::nvidia
- remote::runpod
- remote::openai
- remote::anthropic
- remote::gemini
- remote::groq
- remote::fireworks-openai-compat
- remote::llama-openai-compat
- remote::together-openai-compat
- remote::groq-openai-compat
- remote::sambanova-openai-compat
- remote::cerebras-openai-compat
- remote::sambanova
- remote::passthrough
- inline::sentence-transformers
vector_io:
- inline::faiss

File diff suppressed because it is too large Load diff

View file

@ -113,6 +113,19 @@ from llama_stack.templates.template import (
get_model_registry,
)
REMOTE_INFERENCE_PROVIDERS_FOR_STARTER = {
"anthropic",
"cerebras",
"fireworks",
"gemini",
"groq",
"ollama",
"openai",
"sambanova",
"together",
"vllm",
}
def _get_model_entries_for_provider(provider_type: str) -> list[ProviderModelEntry]:
"""Get model entries for a specific provider type."""
@ -207,41 +220,27 @@ def get_remote_inference_providers() -> tuple[list[Provider], dict[str, list[Pro
remote_providers = [
provider
for provider in all_providers
# TODO: re-add once the Python 3.13 issue is fixed
# discussion: https://github.com/meta-llama/llama-stack/pull/2327#discussion_r2156883828
if hasattr(provider, "adapter") and provider.adapter.adapter_type != "watsonx"
if hasattr(provider, "adapter") and provider.adapter.adapter_type in REMOTE_INFERENCE_PROVIDERS_FOR_STARTER
]
providers = []
inference_providers = []
available_models = {}
for provider_spec in remote_providers:
provider_type = provider_spec.adapter.adapter_type
# Build the environment variable name for enabling this provider
env_var = f"ENABLE_{provider_type.upper().replace('-', '_').replace('::', '_')}"
model_entries = _get_model_entries_for_provider(provider_type)
config = _get_config_for_provider(provider_spec)
providers.append(
(
f"${{env.{env_var}:=__disabled__}}",
provider_type,
model_entries,
config,
)
)
available_models[f"${{env.{env_var}:=__disabled__}}"] = model_entries
inference_providers = []
for provider_id, provider_type, model_entries, config in providers:
inference_providers.append(
Provider(
provider_id=provider_id,
provider_id=provider_type,
provider_type=f"remote::{provider_type}",
config=config,
)
)
available_models[provider_id] = model_entries
available_models[provider_type] = model_entries
return inference_providers, available_models