mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
update starter
This commit is contained in:
parent
7915551eee
commit
0369dd4191
44 changed files with 351 additions and 670 deletions
|
@ -13,7 +13,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
|
|||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
api_key: ${env.ANTHROPIC_API_KEY}
|
||||
api_key: ${env.ANTHROPIC_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API fo
|
|||
|
||||
```yaml
|
||||
openai_compat_api_base: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY}
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
|
|||
|
||||
```yaml
|
||||
base_url: https://api.cerebras.ai
|
||||
api_key: ${env.CEREBRAS_API_KEY}
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -14,8 +14,8 @@ Databricks inference provider for running models on Databricks' unified analytic
|
|||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.DATABRICKS_URL}
|
||||
api_token: ${env.DATABRICKS_API_TOKEN}
|
||||
url: ${env.DATABRICKS_URL:=}
|
||||
api_token: ${env.DATABRICKS_API_TOKEN:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI A
|
|||
|
||||
```yaml
|
||||
openai_compat_api_base: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY}
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
|
|||
|
||||
```yaml
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY}
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
|
|||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
api_key: ${env.GEMINI_API_KEY}
|
||||
api_key: ${env.GEMINI_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ Groq OpenAI-compatible provider for using Groq models with OpenAI API format.
|
|||
|
||||
```yaml
|
||||
openai_compat_api_base: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY}
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
|
|||
|
||||
```yaml
|
||||
url: https://api.groq.com
|
||||
api_key: ${env.GROQ_API_KEY}
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
|
|||
|
||||
```yaml
|
||||
openai_compat_api_base: https://api.llama.com/compat/v1/
|
||||
api_key: ${env.LLAMA_API_KEY}
|
||||
api_key: ${env.LLAMA_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
|
|||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
api_key: ${env.OPENAI_API_KEY}
|
||||
api_key: ${env.OPENAI_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ Passthrough inference provider for connecting to any external inference service
|
|||
|
||||
```yaml
|
||||
url: ${env.PASSTHROUGH_URL}
|
||||
api_key: ${env.PASSTHROUGH_API_KEY}
|
||||
api_key: ${env.PASSTHROUGH_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API
|
|||
|
||||
```yaml
|
||||
openai_compat_api_base: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY}
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
|
|||
|
||||
```yaml
|
||||
url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY}
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ Together AI OpenAI-compatible provider for using Together models with OpenAI API
|
|||
|
||||
```yaml
|
||||
openai_compat_api_base: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY}
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ Together AI inference provider for open-source models and collaborative AI devel
|
|||
|
||||
```yaml
|
||||
url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY}
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ Remote vLLM inference provider for connecting to vLLM servers.
|
|||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.VLLM_URL}
|
||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
|
|
|
@ -15,7 +15,7 @@ SambaNova's safety provider for content moderation and safety filtering.
|
|||
|
||||
```yaml
|
||||
url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY}
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ Please refer to the inline provider documentation.
|
|||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
api_key: ${env.QDRANT_API_KEY}
|
||||
api_key: ${env.QDRANT_API_KEY:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ class AnthropicConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
@ -65,9 +65,10 @@ class CerebrasInferenceAdapter(
|
|||
)
|
||||
self.config = config
|
||||
|
||||
api_key = self.config.api_key.get_secret_value() if self.config.api_key else ""
|
||||
self.client = AsyncCerebras(
|
||||
base_url=self.config.base_url,
|
||||
api_key=self.config.api_key.get_secret_value(),
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
|
|
|
@ -26,7 +26,7 @@ class CerebrasImplConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"base_url": DEFAULT_BASE_URL,
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -31,7 +31,7 @@ class CerebrasCompatConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"openai_compat_api_base": "https://api.cerebras.ai/v1",
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -25,8 +25,8 @@ class DatabricksImplConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.DATABRICKS_URL}",
|
||||
api_token: str = "${env.DATABRICKS_API_TOKEN}",
|
||||
url: str = "${env.DATABRICKS_URL:=}",
|
||||
api_token: str = "${env.DATABRICKS_API_TOKEN:=}",
|
||||
**kwargs: Any,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
|
|
|
@ -23,7 +23,7 @@ class FireworksImplConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.fireworks.ai/inference/v1",
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -96,6 +96,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
|
|||
|
||||
def _get_client(self) -> Fireworks:
|
||||
fireworks_api_key = self._get_api_key()
|
||||
print(f">>>>>> fireworks_api_key: {fireworks_api_key} <<<<<")
|
||||
return Fireworks(api_key=fireworks_api_key)
|
||||
|
||||
def _get_openai_client(self) -> AsyncOpenAI:
|
||||
|
|
|
@ -31,7 +31,7 @@ class FireworksCompatConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"openai_compat_api_base": "https://api.fireworks.ai/inference/v1",
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -26,7 +26,7 @@ class GeminiConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ class GroqConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.groq.com",
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -31,7 +31,7 @@ class GroqCompatConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"openai_compat_api_base": "https://api.groq.com/openai/v1",
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -31,7 +31,7 @@ class LlamaCompatConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"openai_compat_api_base": "https://api.llama.com/compat/v1/",
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -26,7 +26,7 @@ class OpenAIConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ class PassthroughImplConfig(BaseModel):
|
|||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
|
||||
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY:=}", **kwargs
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
|
|
|
@ -30,7 +30,7 @@ class SambaNovaImplConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.sambanova.ai/v1",
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -31,7 +31,7 @@ class SambaNovaCompatConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"openai_compat_api_base": "https://api.sambanova.ai/v1",
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
|
|||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.together.xyz/v1",
|
||||
"api_key": "${env.TOGETHER_API_KEY}",
|
||||
"api_key": "${env.TOGETHER_API_KEY:=}",
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ class TogetherCompatConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.TOGETHER_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.TOGETHER_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"openai_compat_api_base": "https://api.together.xyz/v1",
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -46,7 +46,7 @@ class VLLMInferenceAdapterConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.VLLM_URL}",
|
||||
url: str = "${env.VLLM_URL:=http://localhost:8000/v1}",
|
||||
**kwargs,
|
||||
):
|
||||
return {
|
||||
|
|
|
@ -30,7 +30,7 @@ class SambaNovaSafetyConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.sambanova.ai/v1",
|
||||
"api_key": api_key,
|
||||
|
|
|
@ -27,5 +27,5 @@ class QdrantVectorIOConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.QDRANT_API_KEY}",
|
||||
"api_key": "${env.QDRANT_API_KEY:=}",
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ providers:
|
|||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY}
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
vector_io:
|
||||
- provider_id: sqlite-vec
|
||||
provider_type: inline::sqlite-vec
|
||||
|
|
|
@ -6,27 +6,13 @@ distribution_spec:
|
|||
- remote::cerebras
|
||||
- remote::ollama
|
||||
- remote::vllm
|
||||
- remote::tgi
|
||||
- remote::hf::serverless
|
||||
- remote::hf::endpoint
|
||||
- remote::fireworks
|
||||
- remote::together
|
||||
- remote::bedrock
|
||||
- remote::databricks
|
||||
- remote::nvidia
|
||||
- remote::runpod
|
||||
- remote::openai
|
||||
- remote::anthropic
|
||||
- remote::gemini
|
||||
- remote::groq
|
||||
- remote::fireworks-openai-compat
|
||||
- remote::llama-openai-compat
|
||||
- remote::together-openai-compat
|
||||
- remote::groq-openai-compat
|
||||
- remote::sambanova-openai-compat
|
||||
- remote::cerebras-openai-compat
|
||||
- remote::sambanova
|
||||
- remote::passthrough
|
||||
- inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -113,6 +113,19 @@ from llama_stack.templates.template import (
|
|||
get_model_registry,
|
||||
)
|
||||
|
||||
REMOTE_INFERENCE_PROVIDERS_FOR_STARTER = {
|
||||
"anthropic",
|
||||
"cerebras",
|
||||
"fireworks",
|
||||
"gemini",
|
||||
"groq",
|
||||
"ollama",
|
||||
"openai",
|
||||
"sambanova",
|
||||
"together",
|
||||
"vllm",
|
||||
}
|
||||
|
||||
|
||||
def _get_model_entries_for_provider(provider_type: str) -> list[ProviderModelEntry]:
|
||||
"""Get model entries for a specific provider type."""
|
||||
|
@ -207,41 +220,27 @@ def get_remote_inference_providers() -> tuple[list[Provider], dict[str, list[Pro
|
|||
remote_providers = [
|
||||
provider
|
||||
for provider in all_providers
|
||||
# TODO: re-add once the Python 3.13 issue is fixed
|
||||
# discussion: https://github.com/meta-llama/llama-stack/pull/2327#discussion_r2156883828
|
||||
if hasattr(provider, "adapter") and provider.adapter.adapter_type != "watsonx"
|
||||
if hasattr(provider, "adapter") and provider.adapter.adapter_type in REMOTE_INFERENCE_PROVIDERS_FOR_STARTER
|
||||
]
|
||||
|
||||
providers = []
|
||||
inference_providers = []
|
||||
available_models = {}
|
||||
|
||||
for provider_spec in remote_providers:
|
||||
provider_type = provider_spec.adapter.adapter_type
|
||||
|
||||
# Build the environment variable name for enabling this provider
|
||||
env_var = f"ENABLE_{provider_type.upper().replace('-', '_').replace('::', '_')}"
|
||||
model_entries = _get_model_entries_for_provider(provider_type)
|
||||
config = _get_config_for_provider(provider_spec)
|
||||
providers.append(
|
||||
(
|
||||
f"${{env.{env_var}:=__disabled__}}",
|
||||
provider_type,
|
||||
model_entries,
|
||||
config,
|
||||
)
|
||||
)
|
||||
available_models[f"${{env.{env_var}:=__disabled__}}"] = model_entries
|
||||
|
||||
inference_providers = []
|
||||
for provider_id, provider_type, model_entries, config in providers:
|
||||
inference_providers.append(
|
||||
Provider(
|
||||
provider_id=provider_id,
|
||||
provider_id=provider_type,
|
||||
provider_type=f"remote::{provider_type}",
|
||||
config=config,
|
||||
)
|
||||
)
|
||||
available_models[provider_id] = model_entries
|
||||
available_models[provider_type] = model_entries
|
||||
|
||||
return inference_providers, available_models
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue