diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index ee4bf0b25..ee9ddc818 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -46,6 +46,8 @@ The following models are available by default: - `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` - `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)` - `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)` +- `accounts/fireworks/models/llama4-scout-instruct-basic (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)` +- `accounts/fireworks/models/llama4-maverick-instruct-basic (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)` - `nomic-ai/nomic-embed-text-v1.5 ` diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md index fe922f23d..4f5a8a859 100644 --- a/docs/source/distributions/self_hosted_distro/groq.md +++ b/docs/source/distributions/self_hosted_distro/groq.md @@ -42,6 +42,8 @@ The following models are available by default: - `groq/llama3-70b-8192 (aliases: meta-llama/Llama-3-70B-Instruct)` - `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)` - `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `groq/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)` +- `groq/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index 1d2e0d9df..76b976d78 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -43,6 +43,7 @@ The following models are available by default: - `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` - `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` - `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)` +- `Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index b07e85a1c..3ebb1f59e 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -48,6 +48,8 @@ The following models are available by default: - `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)` - `togethercomputer/m2-bert-80M-8k-retrieval ` - `togethercomputer/m2-bert-80M-32k-retrieval ` +- `meta-llama/Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct, together/meta-llama/Llama-4-Scout-17B-16E-Instruct)` +- `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct, together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8)` ### Prerequisite: API Keys diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 5f9ae421f..aabb3bbdf 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -217,6 +217,56 @@ def available_providers() -> List[ProviderSpec]: provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator", ), ), + remote_provider_spec( + api=Api.inference, + adapter=AdapterSpec( + adapter_type="fireworks-openai-compat", + pip_packages=["litellm"], + module="llama_stack.providers.remote.inference.fireworks_openai_compat", + config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig", + provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator", + ), + ), + remote_provider_spec( + api=Api.inference, + adapter=AdapterSpec( + adapter_type="together-openai-compat", + pip_packages=["litellm"], + module="llama_stack.providers.remote.inference.together_openai_compat", + config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig", + provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator", + ), + ), + remote_provider_spec( + api=Api.inference, + adapter=AdapterSpec( + adapter_type="groq-openai-compat", + pip_packages=["litellm"], + module="llama_stack.providers.remote.inference.groq_openai_compat", + config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig", + provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator", + ), + ), + remote_provider_spec( + api=Api.inference, + adapter=AdapterSpec( + adapter_type="sambanova-openai-compat", + pip_packages=["litellm"], + module="llama_stack.providers.remote.inference.sambanova_openai_compat", + config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig", + provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator", + ), + ), + remote_provider_spec( + api=Api.inference, + adapter=AdapterSpec( + adapter_type="cerebras-openai-compat", + pip_packages=["litellm"], + module="llama_stack.providers.remote.inference.cerebras_openai_compat", + config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig", + provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator", + ), + ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( diff --git a/llama_stack/providers/remote/inference/cerebras_openai_compat/__init__.py b/llama_stack/providers/remote/inference/cerebras_openai_compat/__init__.py new file mode 100644 index 000000000..a5f07edd2 --- /dev/null +++ b/llama_stack/providers/remote/inference/cerebras_openai_compat/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.inference import Inference + +from .config import CerebrasCompatConfig + + +async def get_adapter_impl(config: CerebrasCompatConfig, _deps) -> Inference: + # import dynamically so the import is used only when it is needed + from .cerebras import CerebrasCompatInferenceAdapter + + adapter = CerebrasCompatInferenceAdapter(config) + return adapter diff --git a/llama_stack/providers/remote/inference/cerebras_openai_compat/cerebras.py b/llama_stack/providers/remote/inference/cerebras_openai_compat/cerebras.py new file mode 100644 index 000000000..b3f109dcc --- /dev/null +++ b/llama_stack/providers/remote/inference/cerebras_openai_compat/cerebras.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.providers.remote.inference.cerebras_openai_compat.config import CerebrasCompatConfig +from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin + +from ..cerebras.models import MODEL_ENTRIES + + +class CerebrasCompatInferenceAdapter(LiteLLMOpenAIMixin): + _config: CerebrasCompatConfig + + def __init__(self, config: CerebrasCompatConfig): + LiteLLMOpenAIMixin.__init__( + self, + model_entries=MODEL_ENTRIES, + api_key_from_config=config.api_key, + provider_data_api_key_field="cerebras_api_key", + openai_compat_api_base=config.openai_compat_api_base, + ) + self.config = config + + async def initialize(self): + await super().initialize() + + async def shutdown(self): + await super().shutdown() diff --git a/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py b/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py new file mode 100644 index 000000000..149c0a202 --- /dev/null +++ b/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py @@ -0,0 +1,38 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field + +from llama_stack.schema_utils import json_schema_type + + +class CerebrasProviderDataValidator(BaseModel): + cerebras_api_key: Optional[str] = Field( + default=None, + description="API key for Cerebras models", + ) + + +@json_schema_type +class CerebrasCompatConfig(BaseModel): + api_key: Optional[str] = Field( + default=None, + description="The Cerebras API key", + ) + + openai_compat_api_base: str = Field( + default="https://api.cerebras.ai/v1", + description="The URL for the Cerebras API server", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> Dict[str, Any]: + return { + "openai_compat_api_base": "https://api.cerebras.ai/v1", + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index 4975d061f..027eeab8d 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -48,6 +48,14 @@ MODEL_ENTRIES = [ "accounts/fireworks/models/llama-guard-3-11b-vision", CoreModelId.llama_guard_3_11b_vision.value, ), + build_hf_repo_model_entry( + "accounts/fireworks/models/llama4-scout-instruct-basic", + CoreModelId.llama4_scout_17b_16e_instruct.value, + ), + build_hf_repo_model_entry( + "accounts/fireworks/models/llama4-maverick-instruct-basic", + CoreModelId.llama4_maverick_17b_128e_instruct.value, + ), ProviderModelEntry( provider_model_id="nomic-ai/nomic-embed-text-v1.5", model_type=ModelType.embedding, diff --git a/llama_stack/providers/remote/inference/fireworks_openai_compat/__init__.py b/llama_stack/providers/remote/inference/fireworks_openai_compat/__init__.py new file mode 100644 index 000000000..f78f218b5 --- /dev/null +++ b/llama_stack/providers/remote/inference/fireworks_openai_compat/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.inference import Inference + +from .config import FireworksCompatConfig + + +async def get_adapter_impl(config: FireworksCompatConfig, _deps) -> Inference: + # import dynamically so the import is used only when it is needed + from .fireworks import FireworksCompatInferenceAdapter + + adapter = FireworksCompatInferenceAdapter(config) + return adapter diff --git a/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py b/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py new file mode 100644 index 000000000..0263d348a --- /dev/null +++ b/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py @@ -0,0 +1,38 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field + +from llama_stack.schema_utils import json_schema_type + + +class FireworksProviderDataValidator(BaseModel): + fireworks_api_key: Optional[str] = Field( + default=None, + description="API key for Fireworks models", + ) + + +@json_schema_type +class FireworksCompatConfig(BaseModel): + api_key: Optional[str] = Field( + default=None, + description="The Fireworks API key", + ) + + openai_compat_api_base: str = Field( + default="https://api.fireworks.ai/inference/v1", + description="The URL for the Fireworks API server", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> Dict[str, Any]: + return { + "openai_compat_api_base": "https://api.fireworks.ai/inference/v1", + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/fireworks_openai_compat/fireworks.py b/llama_stack/providers/remote/inference/fireworks_openai_compat/fireworks.py new file mode 100644 index 000000000..f6045e0eb --- /dev/null +++ b/llama_stack/providers/remote/inference/fireworks_openai_compat/fireworks.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.providers.remote.inference.fireworks_openai_compat.config import FireworksCompatConfig +from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin + +from ..fireworks.models import MODEL_ENTRIES + + +class FireworksCompatInferenceAdapter(LiteLLMOpenAIMixin): + _config: FireworksCompatConfig + + def __init__(self, config: FireworksCompatConfig): + LiteLLMOpenAIMixin.__init__( + self, + model_entries=MODEL_ENTRIES, + api_key_from_config=config.api_key, + provider_data_api_key_field="fireworks_api_key", + openai_compat_api_base=config.openai_compat_api_base, + ) + self.config = config + + async def initialize(self): + await super().initialize() + + async def shutdown(self): + await super().shutdown() diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py index 08b9b4dc4..d0c10ca62 100644 --- a/llama_stack/providers/remote/inference/groq/models.py +++ b/llama_stack/providers/remote/inference/groq/models.py @@ -35,4 +35,12 @@ MODEL_ENTRIES = [ "groq/llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), + build_hf_repo_model_entry( + "groq/llama-4-scout-17b-16e-instruct", + CoreModelId.llama4_scout_17b_16e_instruct.value, + ), + build_hf_repo_model_entry( + "groq/llama-4-maverick-17b-128e-instruct", + CoreModelId.llama4_maverick_17b_128e_instruct.value, + ), ] diff --git a/llama_stack/providers/remote/inference/groq_openai_compat/__init__.py b/llama_stack/providers/remote/inference/groq_openai_compat/__init__.py new file mode 100644 index 000000000..8161df20d --- /dev/null +++ b/llama_stack/providers/remote/inference/groq_openai_compat/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.inference import Inference + +from .config import GroqCompatConfig + + +async def get_adapter_impl(config: GroqCompatConfig, _deps) -> Inference: + # import dynamically so the import is used only when it is needed + from .groq import GroqCompatInferenceAdapter + + adapter = GroqCompatInferenceAdapter(config) + return adapter diff --git a/llama_stack/providers/remote/inference/groq_openai_compat/config.py b/llama_stack/providers/remote/inference/groq_openai_compat/config.py new file mode 100644 index 000000000..4b90b4576 --- /dev/null +++ b/llama_stack/providers/remote/inference/groq_openai_compat/config.py @@ -0,0 +1,38 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field + +from llama_stack.schema_utils import json_schema_type + + +class GroqProviderDataValidator(BaseModel): + groq_api_key: Optional[str] = Field( + default=None, + description="API key for Groq models", + ) + + +@json_schema_type +class GroqCompatConfig(BaseModel): + api_key: Optional[str] = Field( + default=None, + description="The Groq API key", + ) + + openai_compat_api_base: str = Field( + default="https://api.groq.com/openai/v1", + description="The URL for the Groq API server", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> Dict[str, Any]: + return { + "openai_compat_api_base": "https://api.groq.com/openai/v1", + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/groq_openai_compat/groq.py b/llama_stack/providers/remote/inference/groq_openai_compat/groq.py new file mode 100644 index 000000000..30e18cd06 --- /dev/null +++ b/llama_stack/providers/remote/inference/groq_openai_compat/groq.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.providers.remote.inference.groq_openai_compat.config import GroqCompatConfig +from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin + +from ..groq.models import MODEL_ENTRIES + + +class GroqCompatInferenceAdapter(LiteLLMOpenAIMixin): + _config: GroqCompatConfig + + def __init__(self, config: GroqCompatConfig): + LiteLLMOpenAIMixin.__init__( + self, + model_entries=MODEL_ENTRIES, + api_key_from_config=config.api_key, + provider_data_api_key_field="groq_api_key", + openai_compat_api_base=config.openai_compat_api_base, + ) + self.config = config + + async def initialize(self): + await super().initialize() + + async def shutdown(self): + await super().shutdown() diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py index 9589ea268..43041e94a 100644 --- a/llama_stack/providers/remote/inference/sambanova/models.py +++ b/llama_stack/providers/remote/inference/sambanova/models.py @@ -46,4 +46,8 @@ MODEL_ENTRIES = [ "Meta-Llama-Guard-3-8B", CoreModelId.llama_guard_3_8b.value, ), + build_hf_repo_model_entry( + "Llama-4-Scout-17B-16E-Instruct", + CoreModelId.llama4_scout_17b_16e_instruct.value, + ), ] diff --git a/llama_stack/providers/remote/inference/sambanova_openai_compat/__init__.py b/llama_stack/providers/remote/inference/sambanova_openai_compat/__init__.py new file mode 100644 index 000000000..e31a3364c --- /dev/null +++ b/llama_stack/providers/remote/inference/sambanova_openai_compat/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.inference import Inference + +from .config import SambaNovaCompatConfig + + +async def get_adapter_impl(config: SambaNovaCompatConfig, _deps) -> Inference: + # import dynamically so the import is used only when it is needed + from .sambanova import SambaNovaCompatInferenceAdapter + + adapter = SambaNovaCompatInferenceAdapter(config) + return adapter diff --git a/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py b/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py new file mode 100644 index 000000000..b792cb6e7 --- /dev/null +++ b/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py @@ -0,0 +1,38 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field + +from llama_stack.schema_utils import json_schema_type + + +class SambaNovaProviderDataValidator(BaseModel): + sambanova_api_key: Optional[str] = Field( + default=None, + description="API key for SambaNova models", + ) + + +@json_schema_type +class SambaNovaCompatConfig(BaseModel): + api_key: Optional[str] = Field( + default=None, + description="The SambaNova API key", + ) + + openai_compat_api_base: str = Field( + default="https://api.sambanova.ai/v1", + description="The URL for the SambaNova API server", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> Dict[str, Any]: + return { + "openai_compat_api_base": "https://api.sambanova.ai/v1", + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/sambanova_openai_compat/sambanova.py b/llama_stack/providers/remote/inference/sambanova_openai_compat/sambanova.py new file mode 100644 index 000000000..aa59028b6 --- /dev/null +++ b/llama_stack/providers/remote/inference/sambanova_openai_compat/sambanova.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.providers.remote.inference.sambanova_openai_compat.config import SambaNovaCompatConfig +from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin + +from ..sambanova.models import MODEL_ENTRIES + + +class SambaNovaCompatInferenceAdapter(LiteLLMOpenAIMixin): + _config: SambaNovaCompatConfig + + def __init__(self, config: SambaNovaCompatConfig): + LiteLLMOpenAIMixin.__init__( + self, + model_entries=MODEL_ENTRIES, + api_key_from_config=config.api_key, + provider_data_api_key_field="sambanova_api_key", + openai_compat_api_base=config.openai_compat_api_base, + ) + self.config = config + + async def initialize(self): + await super().initialize() + + async def shutdown(self): + await super().shutdown() diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index f014c03f0..f4b259767 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -64,4 +64,18 @@ MODEL_ENTRIES = [ "context_length": 32768, }, ), + build_hf_repo_model_entry( + "meta-llama/Llama-4-Scout-17B-16E-Instruct", + CoreModelId.llama4_scout_17b_16e_instruct.value, + additional_aliases=[ + "together/meta-llama/Llama-4-Scout-17B-16E-Instruct", + ], + ), + build_hf_repo_model_entry( + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + CoreModelId.llama4_maverick_17b_128e_instruct.value, + additional_aliases=[ + "together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + ], + ), ] diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index a4e02f2cb..df7610935 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -118,7 +118,7 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator: params = await self._get_params(request) - client = await self._get_client() + client = self._get_client() stream = await client.completions.create(**params) async for chunk in process_completion_stream_response(stream): yield chunk diff --git a/llama_stack/providers/remote/inference/together_openai_compat/__init__.py b/llama_stack/providers/remote/inference/together_openai_compat/__init__.py new file mode 100644 index 000000000..6fdf05b7e --- /dev/null +++ b/llama_stack/providers/remote/inference/together_openai_compat/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.inference import Inference + +from .config import TogetherCompatConfig + + +async def get_adapter_impl(config: TogetherCompatConfig, _deps) -> Inference: + # import dynamically so the import is used only when it is needed + from .together import TogetherCompatInferenceAdapter + + adapter = TogetherCompatInferenceAdapter(config) + return adapter diff --git a/llama_stack/providers/remote/inference/together_openai_compat/config.py b/llama_stack/providers/remote/inference/together_openai_compat/config.py new file mode 100644 index 000000000..120adbed9 --- /dev/null +++ b/llama_stack/providers/remote/inference/together_openai_compat/config.py @@ -0,0 +1,38 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field + +from llama_stack.schema_utils import json_schema_type + + +class TogetherProviderDataValidator(BaseModel): + together_api_key: Optional[str] = Field( + default=None, + description="API key for Together models", + ) + + +@json_schema_type +class TogetherCompatConfig(BaseModel): + api_key: Optional[str] = Field( + default=None, + description="The Together API key", + ) + + openai_compat_api_base: str = Field( + default="https://api.together.xyz/v1", + description="The URL for the Together API server", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.TOGETHER_API_KEY}", **kwargs) -> Dict[str, Any]: + return { + "openai_compat_api_base": "https://api.together.xyz/v1", + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/together_openai_compat/together.py b/llama_stack/providers/remote/inference/together_openai_compat/together.py new file mode 100644 index 000000000..b463f5c35 --- /dev/null +++ b/llama_stack/providers/remote/inference/together_openai_compat/together.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.providers.remote.inference.together_openai_compat.config import TogetherCompatConfig +from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin + +from ..together.models import MODEL_ENTRIES + + +class TogetherCompatInferenceAdapter(LiteLLMOpenAIMixin): + _config: TogetherCompatConfig + + def __init__(self, config: TogetherCompatConfig): + LiteLLMOpenAIMixin.__init__( + self, + model_entries=MODEL_ENTRIES, + api_key_from_config=config.api_key, + provider_data_api_key_field="together_api_key", + openai_compat_api_base=config.openai_compat_api_base, + ) + self.config = config + + async def initialize(self): + await super().initialize() + + async def shutdown(self): + await super().shutdown() diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index a11c734df..4d7063953 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -33,12 +33,17 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]: return None -def build_hf_repo_model_entry(provider_model_id: str, model_descriptor: str) -> ProviderModelEntry: +def build_hf_repo_model_entry( + provider_model_id: str, model_descriptor: str, additional_aliases: Optional[List[str]] = None +) -> ProviderModelEntry: + aliases = [ + get_huggingface_repo(model_descriptor), + ] + if additional_aliases: + aliases.extend(additional_aliases) return ProviderModelEntry( provider_model_id=provider_model_id, - aliases=[ - get_huggingface_repo(model_descriptor), - ], + aliases=aliases, llama_model=model_descriptor, ) diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 44a89dfb0..0f3945b34 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -642,6 +642,36 @@ PYTHON_TYPE_TO_LITELLM_TYPE = { } +def to_openai_param_type(param_type: str) -> dict: + """ + Convert Python type hints to OpenAI parameter type format. + + Examples: + 'str' -> {'type': 'string'} + 'int' -> {'type': 'integer'} + 'list[str]' -> {'type': 'array', 'items': {'type': 'string'}} + 'list[int]' -> {'type': 'array', 'items': {'type': 'integer'}} + """ + # Handle basic types first + basic_types = { + "str": "string", + "int": "integer", + "float": "number", + "bool": "boolean", + } + + if param_type in basic_types: + return {"type": basic_types[param_type]} + + # Handle list/array types + if param_type.startswith("list[") and param_type.endswith("]"): + inner_type = param_type[5:-1] + if inner_type in basic_types: + return {"type": "array", "items": {"type": basic_types.get(inner_type, inner_type)}} + + return {"type": param_type} + + def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict: """ Convert a ToolDefinition to an OpenAI API-compatible dictionary. @@ -702,7 +732,7 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict: properties = parameters["properties"] required = [] for param_name, param in tool.parameters.items(): - properties[param_name] = {"type": PYTHON_TYPE_TO_LITELLM_TYPE.get(param.param_type, param.param_type)} + properties[param_name] = to_openai_param_type(param.param_type) if param.description: properties[param_name].update(description=param.description) if param.default: diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 04bbe212e..3c16dd5ea 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -193,6 +193,26 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-scout-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 diff --git a/llama_stack/templates/dependencies.json b/llama_stack/templates/dependencies.json index b8f475cea..053d6ef8a 100644 --- a/llama_stack/templates/dependencies.json +++ b/llama_stack/templates/dependencies.json @@ -682,6 +682,45 @@ "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" ], + "verification": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "fastapi", + "fire", + "httpx", + "langdetect", + "litellm", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlite-vec", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], "vllm-gpu": [ "aiosqlite", "autoevals", diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index b4546ca58..ea3b7252a 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -251,6 +251,26 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-scout-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 @@ -356,6 +376,26 @@ models: provider_id: groq provider_model_id: groq/llama-3.2-3b-preview model_type: llm +- metadata: {} + model_id: groq/llama-4-scout-17b-16e-instruct + provider_id: groq + provider_model_id: groq/llama-4-scout-17b-16e-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: groq + provider_model_id: groq/llama-4-scout-17b-16e-instruct + model_type: llm +- metadata: {} + model_id: groq/llama-4-maverick-17b-128e-instruct + provider_id: groq + provider_model_id: groq/llama-4-maverick-17b-128e-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: groq + provider_model_id: groq/llama-4-maverick-17b-128e-instruct + model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 125c66177..aa6209db6 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -205,6 +205,26 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-scout-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 7b3c059e5..834ec8260 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -200,6 +200,26 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-scout-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 6c83ed43d..f557e64fd 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -148,6 +148,26 @@ models: provider_id: groq provider_model_id: groq/llama-3.2-3b-preview model_type: llm +- metadata: {} + model_id: groq/llama-4-scout-17b-16e-instruct + provider_id: groq + provider_model_id: groq/llama-4-scout-17b-16e-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: groq + provider_model_id: groq/llama-4-scout-17b-16e-instruct + model_type: llm +- metadata: {} + model_id: groq/llama-4-maverick-17b-128e-instruct + provider_id: groq + provider_model_id: groq/llama-4-maverick-17b-128e-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: groq + provider_model_id: groq/llama-4-maverick-17b-128e-instruct + model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index a64ada759..e4e8e4e21 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -165,6 +165,16 @@ models: provider_id: sambanova provider_model_id: Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: Llama-4-Scout-17B-16E-Instruct + provider_id: sambanova + provider_model_id: Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: sambanova + provider_model_id: Llama-4-Scout-17B-16E-Instruct + model_type: llm shields: - shield_id: meta-llama/Llama-Guard-3-8B vector_dbs: [] diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 1fbf64e40..105ce896d 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -219,6 +219,36 @@ models: provider_id: together provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval model_type: embedding +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index d71aea640..1f1613655 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -214,6 +214,36 @@ models: provider_id: together provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval model_type: embedding +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 diff --git a/llama_stack/templates/verification/__init__.py b/llama_stack/templates/verification/__init__.py new file mode 100644 index 000000000..5d8c281a6 --- /dev/null +++ b/llama_stack/templates/verification/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .verification import get_distribution_template # noqa: F401 diff --git a/llama_stack/templates/verification/build.yaml b/llama_stack/templates/verification/build.yaml new file mode 100644 index 000000000..9f010d651 --- /dev/null +++ b/llama_stack/templates/verification/build.yaml @@ -0,0 +1,38 @@ +version: '2' +distribution_spec: + description: Distribution for running e2e tests in CI + providers: + inference: + - remote::openai + - remote::fireworks-openai-compat + - remote::together-openai-compat + - remote::groq-openai-compat + - remote::sambanova-openai-compat + - remote::cerebras-openai-compat + - inline::sentence-transformers + vector_io: + - inline::sqlite-vec + - remote::chromadb + - remote::pgvector + safety: + - inline::llama-guard + agents: + - inline::meta-reference + telemetry: + - inline::meta-reference + eval: + - inline::meta-reference + datasetio: + - remote::huggingface + - inline::localfs + scoring: + - inline::basic + - inline::llm-as-judge + - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::rag-runtime + - remote::model-context-protocol +image_type: conda diff --git a/llama_stack/templates/verification/run.yaml b/llama_stack/templates/verification/run.yaml new file mode 100644 index 000000000..b6c2ca98d --- /dev/null +++ b/llama_stack/templates/verification/run.yaml @@ -0,0 +1,626 @@ +version: '2' +image_name: verification +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:} + - provider_id: fireworks-openai-compat + provider_type: remote::fireworks-openai-compat + config: + openai_compat_api_base: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:} + - provider_id: together-openai-compat + provider_type: remote::together-openai-compat + config: + openai_compat_api_base: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:} + - provider_id: groq-openai-compat + provider_type: remote::groq-openai-compat + config: + openai_compat_api_base: https://api.groq.com/openai/v1 + api_key: ${env.GROQ_API_KEY:} + - provider_id: sambanova-openai-compat + provider_type: remote::sambanova-openai-compat + config: + openai_compat_api_base: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:} + - provider_id: cerebras-openai-compat + provider_type: remote::cerebras-openai-compat + config: + openai_compat_api_base: https://api.cerebras.ai/v1 + api_key: ${env.CEREBRAS_API_KEY:} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:} + - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:localhost} + port: ${env.PGVECTOR_PORT:5432} + db: ${env.PGVECTOR_DB:} + user: ${env.PGVECTOR_USER:} + password: ${env.PGVECTOR_PASSWORD:} + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + sinks: ${env.TELEMETRY_SINKS:console,sqlite} + sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/verification/trace_store.db} + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/registry.db +models: +- metadata: {} + model_id: openai/gpt-4o + provider_id: openai + provider_model_id: openai/gpt-4o + model_type: llm +- metadata: {} + model_id: openai/gpt-4o-mini + provider_id: openai + provider_model_id: openai/gpt-4o-mini + model_type: llm +- metadata: {} + model_id: openai/chatgpt-4o-latest + provider_id: openai + provider_model_id: openai/chatgpt-4o-latest + model_type: llm +- metadata: + embedding_dimension: 1536 + context_length: 8192 + model_id: openai/text-embedding-3-small + provider_id: openai + provider_model_id: openai/text-embedding-3-small + model_type: embedding +- metadata: + embedding_dimension: 3072 + context_length: 8192 + model_id: openai/text-embedding-3-large + provider_id: openai + provider_model_id: openai/text-embedding-3-large + model_type: embedding +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-8B + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-scout-instruct-basic + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: fireworks-openai-compat + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: + embedding_dimension: 768 + context_length: 8192 + model_id: nomic-ai/nomic-embed-text-v1.5 + provider_id: fireworks-openai-compat + provider_model_id: nomic-ai/nomic-embed-text-v1.5 + model_type: embedding +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together-openai-compat + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together-openai-compat + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together-openai-compat + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: together-openai-compat + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together-openai-compat + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-8B + provider_id: together-openai-compat + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm +- metadata: + embedding_dimension: 768 + context_length: 8192 + model_id: togethercomputer/m2-bert-80M-8k-retrieval + provider_id: together-openai-compat + provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval + model_type: embedding +- metadata: + embedding_dimension: 768 + context_length: 32768 + model_id: togethercomputer/m2-bert-80M-32k-retrieval + provider_id: together-openai-compat + provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval + model_type: embedding +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together-openai-compat + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: groq/llama3-8b-8192 + provider_id: groq-openai-compat + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: groq-openai-compat + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.1-8b-instant + provider_id: groq-openai-compat + provider_model_id: groq/llama-3.1-8b-instant + model_type: llm +- metadata: {} + model_id: groq/llama3-70b-8192 + provider_id: groq-openai-compat + provider_model_id: groq/llama3-70b-8192 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3-70B-Instruct + provider_id: groq-openai-compat + provider_model_id: groq/llama3-70b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq-openai-compat + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: groq-openai-compat + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm +- metadata: {} + model_id: groq/llama-3.2-3b-preview + provider_id: groq-openai-compat + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: groq-openai-compat + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm +- metadata: {} + model_id: groq/llama-4-scout-17b-16e-instruct + provider_id: groq-openai-compat + provider_model_id: groq/llama-4-scout-17b-16e-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: groq-openai-compat + provider_model_id: groq/llama-4-scout-17b-16e-instruct + model_type: llm +- metadata: {} + model_id: groq/llama-4-maverick-17b-128e-instruct + provider_id: groq-openai-compat + provider_model_id: groq/llama-4-maverick-17b-128e-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: groq-openai-compat + provider_model_id: groq/llama-4-maverick-17b-128e-instruct + model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-8B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.1-8B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.1-8B-Instruct + model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-70B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.1-70B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.1-70B-Instruct + model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-405B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.1-405B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.1-405B-Instruct + model_type: llm +- metadata: {} + model_id: Meta-Llama-3.2-1B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.2-1B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-1B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.2-1B-Instruct + model_type: llm +- metadata: {} + model_id: Meta-Llama-3.2-3B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.2-3B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.2-3B-Instruct + model_type: llm +- metadata: {} + model_id: Meta-Llama-3.3-70B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.3-70B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-3.3-70B-Instruct + model_type: llm +- metadata: {} + model_id: Llama-3.2-11B-Vision-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Llama-3.2-11B-Vision-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Llama-3.2-11B-Vision-Instruct + model_type: llm +- metadata: {} + model_id: Llama-3.2-90B-Vision-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Llama-3.2-90B-Vision-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Llama-3.2-90B-Vision-Instruct + model_type: llm +- metadata: {} + model_id: Meta-Llama-Guard-3-8B + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-8B + provider_id: sambanova-openai-compat + provider_model_id: Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: Llama-4-Scout-17B-16E-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: sambanova-openai-compat + provider_model_id: Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: llama3.1-8b + provider_id: cerebras-openai-compat + provider_model_id: llama3.1-8b + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: cerebras-openai-compat + provider_model_id: llama3.1-8b + model_type: llm +- metadata: {} + model_id: llama-3.3-70b + provider_id: cerebras-openai-compat + provider_model_id: llama-3.3-70b + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: cerebras-openai-compat + provider_model_id: llama-3.3-70b + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter +server: + port: 8321 diff --git a/llama_stack/templates/verification/verification.py b/llama_stack/templates/verification/verification.py new file mode 100644 index 000000000..7ef8d6a96 --- /dev/null +++ b/llama_stack/templates/verification/verification.py @@ -0,0 +1,206 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import List, Tuple + +from llama_stack.apis.models.models import ModelType +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) +from llama_stack.providers.inline.inference.sentence_transformers import ( + SentenceTransformersInferenceConfig, +) +from llama_stack.providers.inline.vector_io.sqlite_vec.config import ( + SQLiteVectorIOConfig, +) +from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES as CEREBRAS_MODEL_ENTRIES +from llama_stack.providers.remote.inference.cerebras_openai_compat.config import CerebrasCompatConfig +from llama_stack.providers.remote.inference.fireworks.models import ( + MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES, +) +from llama_stack.providers.remote.inference.fireworks_openai_compat.config import FireworksCompatConfig +from llama_stack.providers.remote.inference.groq.models import ( + MODEL_ENTRIES as GROQ_MODEL_ENTRIES, +) +from llama_stack.providers.remote.inference.groq_openai_compat.config import GroqCompatConfig +from llama_stack.providers.remote.inference.openai.config import OpenAIConfig +from llama_stack.providers.remote.inference.openai.models import ( + MODEL_ENTRIES as OPENAI_MODEL_ENTRIES, +) +from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES +from llama_stack.providers.remote.inference.sambanova_openai_compat.config import SambaNovaCompatConfig +from llama_stack.providers.remote.inference.together.models import ( + MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES, +) +from llama_stack.providers.remote.inference.together_openai_compat.config import TogetherCompatConfig +from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig +from llama_stack.providers.remote.vector_io.pgvector.config import ( + PGVectorVectorIOConfig, +) +from llama_stack.templates.template import ( + DistributionTemplate, + RunConfigSettings, + get_model_registry, +) + + +def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: + # in this template, we allow each API key to be optional + providers = [ + ( + "openai", + OPENAI_MODEL_ENTRIES, + OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"), + ), + ( + "fireworks-openai-compat", + FIREWORKS_MODEL_ENTRIES, + FireworksCompatConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"), + ), + ( + "together-openai-compat", + TOGETHER_MODEL_ENTRIES, + TogetherCompatConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"), + ), + ( + "groq-openai-compat", + GROQ_MODEL_ENTRIES, + GroqCompatConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"), + ), + ( + "sambanova-openai-compat", + SAMBANOVA_MODEL_ENTRIES, + SambaNovaCompatConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"), + ), + ( + "cerebras-openai-compat", + CEREBRAS_MODEL_ENTRIES, + CerebrasCompatConfig.sample_run_config(api_key="${env.CEREBRAS_API_KEY:}"), + ), + ] + inference_providers = [] + available_models = {} + for provider_id, model_entries, config in providers: + inference_providers.append( + Provider( + provider_id=provider_id, + provider_type=f"remote::{provider_id}", + config=config, + ) + ) + available_models[provider_id] = model_entries + return inference_providers, available_models + + +def get_distribution_template() -> DistributionTemplate: + inference_providers, available_models = get_inference_providers() + providers = { + "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), + "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], + "safety": ["inline::llama-guard"], + "agents": ["inline::meta-reference"], + "telemetry": ["inline::meta-reference"], + "eval": ["inline::meta-reference"], + "datasetio": ["remote::huggingface", "inline::localfs"], + "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::rag-runtime", + "remote::model-context-protocol", + ], + } + name = "verification" + + vector_io_providers = [ + Provider( + provider_id="sqlite-vec", + provider_type="inline::sqlite-vec", + config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_type="remote::chromadb", + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + ), + Provider( + provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_type="remote::pgvector", + config=PGVectorVectorIOConfig.sample_run_config( + db="${env.PGVECTOR_DB:}", + user="${env.PGVECTOR_USER:}", + password="${env.PGVECTOR_PASSWORD:}", + ), + ), + ] + embedding_provider = Provider( + provider_id="sentence-transformers", + provider_type="inline::sentence-transformers", + config=SentenceTransformersInferenceConfig.sample_run_config(), + ) + + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::rag", + provider_id="rag-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] + embedding_model = ModelInput( + model_id="all-MiniLM-L6-v2", + provider_id=embedding_provider.provider_id, + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 384, + }, + ) + + default_models = get_model_registry(available_models) + return DistributionTemplate( + name=name, + distro_type="self_hosted", + description="Distribution for running e2e tests in CI", + container_image=None, + template_path=None, + providers=providers, + available_models_by_provider=available_models, + run_configs={ + "run.yaml": RunConfigSettings( + provider_overrides={ + "inference": inference_providers + [embedding_provider], + "vector_io": vector_io_providers, + }, + default_models=default_models + [embedding_model], + default_tool_groups=default_tool_groups, + default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], + ), + }, + run_config_env_vars={ + "LLAMA_STACK_PORT": ( + "8321", + "Port for the Llama Stack distribution server", + ), + "FIREWORKS_API_KEY": ( + "", + "Fireworks API Key", + ), + "OPENAI_API_KEY": ( + "", + "OpenAI API Key", + ), + }, + ) diff --git a/tests/integration/inference/test_text_inference.py b/tests/integration/inference/test_text_inference.py index 33ce07304..c8cceb0eb 100644 --- a/tests/integration/inference/test_text_inference.py +++ b/tests/integration/inference/test_text_inference.py @@ -6,6 +6,7 @@ import os +from time import sleep import pytest from pydantic import BaseModel @@ -23,11 +24,15 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id): provider_id = models[model_id].provider_id providers = {p.provider_id: p for p in client_with_models.providers.list()} provider = providers[provider_id] - if provider.provider_type in ( - "remote::openai", - "remote::anthropic", - "remote::gemini", - "remote::groq", + if ( + provider.provider_type + in ( + "remote::openai", + "remote::anthropic", + "remote::gemini", + "remote::groq", + ) + or "openai-compat" in provider.provider_type ): pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion") @@ -514,7 +519,7 @@ def test_text_chat_completion_tool_calling_tools_not_in_request( ) def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case): """This test tests the model's tool calling loop in various scenarios""" - if "llama-4" not in text_model_id.lower(): + if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower(): pytest.xfail("Not tested for non-llama4 models yet") tc = TestCase(test_case) @@ -545,7 +550,7 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t ) op_msg = response.completion_message messages.append(op_msg.model_dump()) - # pprint(op_msg) + # print(op_msg) assert op_msg.role == "assistant" expected = tc["expected"].pop(0) @@ -568,3 +573,6 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t actual_answer = op_msg.content.lower() # pprint(actual_answer) assert expected["answer"] in actual_answer + + # sleep to avoid rate limit + sleep(1) diff --git a/tests/integration/inference/test_vision_inference.py b/tests/integration/inference/test_vision_inference.py index a4fd5bcc6..d47dd3d64 100644 --- a/tests/integration/inference/test_vision_inference.py +++ b/tests/integration/inference/test_vision_inference.py @@ -76,8 +76,9 @@ def multi_image_data(): @pytest.mark.parametrize("stream", [True, False]) def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream): - if "llama-4" not in vision_model_id.lower() and "gpt-4o" not in vision_model_id.lower(): - pytest.skip("Skip for non-llama4, gpt4o models") + supported_models = ["llama-4", "gpt-4o", "llama4"] + if not any(model in vision_model_id.lower() for model in supported_models): + pytest.skip(f"Skip for non-supported model: {vision_model_id}") messages = [ { diff --git a/tests/integration/test_cases/inference/chat_completion.json b/tests/integration/test_cases/inference/chat_completion.json index c84d29e64..01956bd59 100644 --- a/tests/integration/test_cases/inference/chat_completion.json +++ b/tests/integration/test_cases/inference/chat_completion.json @@ -220,7 +220,7 @@ "description": "Availability status of the product." }, "tags": { - "param_type": "list", + "param_type": "list[str]", "description": "List of product tags" } } @@ -294,7 +294,7 @@ "description": "Location of the event" }, "participants": { - "param_type": "list", + "param_type": "list[str]", "description": "List of participant names" } }