test: verification on provider's OAI endpoints (#1893)

# What does this PR do? ## Test Plan export MODEL=accounts/fireworks/models/llama4-scout-instruct-basic; LLAMA_STACK_CONFIG=verification pytest -s -v tests/integration/inference --vision-model $MODEL --text-model $MODEL
2025-04-07 23:06:28 -07:00 · 2025-04-07 23:06:28 -07:00 · 7b4eb0967e
commit 7b4eb0967e
parent 530d4bdfe1
43 changed files with 1683 additions and 17 deletions
--- a/llama_stack/providers/remote/inference/cerebras_openai_compat/init.py
+++ b/llama_stack/providers/remote/inference/cerebras_openai_compat/init.py
@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import Inference
+
+from .config import CerebrasCompatConfig
+
+
+async def get_adapter_impl(config: CerebrasCompatConfig, _deps) -> Inference:
+    # import dynamically so the import is used only when it is needed
+    from .cerebras import CerebrasCompatInferenceAdapter
+
+    adapter = CerebrasCompatInferenceAdapter(config)
+    return adapter
--- a/llama_stack/providers/remote/inference/cerebras_openai_compat/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras_openai_compat/cerebras.py
@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.remote.inference.cerebras_openai_compat.config import CerebrasCompatConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+
+from ..cerebras.models import MODEL_ENTRIES
+
+
+class CerebrasCompatInferenceAdapter(LiteLLMOpenAIMixin):
+    _config: CerebrasCompatConfig
+
+    def __init__(self, config: CerebrasCompatConfig):
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            model_entries=MODEL_ENTRIES,
+            api_key_from_config=config.api_key,
+            provider_data_api_key_field="cerebras_api_key",
+            openai_compat_api_base=config.openai_compat_api_base,
+        )
+        self.config = config
+
+    async def initialize(self):
+        await super().initialize()
+
+    async def shutdown(self):
+        await super().shutdown()
--- a/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py
@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class CerebrasProviderDataValidator(BaseModel):
+    cerebras_api_key: Optional[str] = Field(
+        default=None,
+        description="API key for Cerebras models",
+    )
+
+
+@json_schema_type
+class CerebrasCompatConfig(BaseModel):
+    api_key: Optional[str] = Field(
+        default=None,
+        description="The Cerebras API key",
+    )
+
+    openai_compat_api_base: str = Field(
+        default="https://api.cerebras.ai/v1",
+        description="The URL for the Cerebras API server",
+    )
+
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> Dict[str, Any]:
+        return {
+            "openai_compat_api_base": "https://api.cerebras.ai/v1",
+            "api_key": api_key,
+        }
--- a/llama_stack/providers/remote/inference/fireworks/models.py
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@ -48,6 +48,14 @@ MODEL_ENTRIES = [
        "accounts/fireworks/models/llama-guard-3-11b-vision",
        CoreModelId.llama_guard_3_11b_vision.value,
    ),
+    build_hf_repo_model_entry(
+        "accounts/fireworks/models/llama4-scout-instruct-basic",
+        CoreModelId.llama4_scout_17b_16e_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        CoreModelId.llama4_maverick_17b_128e_instruct.value,
+    ),
    ProviderModelEntry(
        provider_model_id="nomic-ai/nomic-embed-text-v1.5",
        model_type=ModelType.embedding,
--- a/llama_stack/providers/remote/inference/fireworks_openai_compat/init.py
+++ b/llama_stack/providers/remote/inference/fireworks_openai_compat/init.py
@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import Inference
+
+from .config import FireworksCompatConfig
+
+
+async def get_adapter_impl(config: FireworksCompatConfig, _deps) -> Inference:
+    # import dynamically so the import is used only when it is needed
+    from .fireworks import FireworksCompatInferenceAdapter
+
+    adapter = FireworksCompatInferenceAdapter(config)
+    return adapter
--- a/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py
@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class FireworksProviderDataValidator(BaseModel):
+    fireworks_api_key: Optional[str] = Field(
+        default=None,
+        description="API key for Fireworks models",
+    )
+
+
+@json_schema_type
+class FireworksCompatConfig(BaseModel):
+    api_key: Optional[str] = Field(
+        default=None,
+        description="The Fireworks API key",
+    )
+
+    openai_compat_api_base: str = Field(
+        default="https://api.fireworks.ai/inference/v1",
+        description="The URL for the Fireworks API server",
+    )
+
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> Dict[str, Any]:
+        return {
+            "openai_compat_api_base": "https://api.fireworks.ai/inference/v1",
+            "api_key": api_key,
+        }
--- a/llama_stack/providers/remote/inference/fireworks_openai_compat/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks_openai_compat/fireworks.py
@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.remote.inference.fireworks_openai_compat.config import FireworksCompatConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+
+from ..fireworks.models import MODEL_ENTRIES
+
+
+class FireworksCompatInferenceAdapter(LiteLLMOpenAIMixin):
+    _config: FireworksCompatConfig
+
+    def __init__(self, config: FireworksCompatConfig):
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            model_entries=MODEL_ENTRIES,
+            api_key_from_config=config.api_key,
+            provider_data_api_key_field="fireworks_api_key",
+            openai_compat_api_base=config.openai_compat_api_base,
+        )
+        self.config = config
+
+    async def initialize(self):
+        await super().initialize()
+
+    async def shutdown(self):
+        await super().shutdown()
--- a/llama_stack/providers/remote/inference/groq/models.py
+++ b/llama_stack/providers/remote/inference/groq/models.py
@ -35,4 +35,12 @@ MODEL_ENTRIES = [
        "groq/llama-3.2-3b-preview",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
+    build_hf_repo_model_entry(
+        "groq/llama-4-scout-17b-16e-instruct",
+        CoreModelId.llama4_scout_17b_16e_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "groq/llama-4-maverick-17b-128e-instruct",
+        CoreModelId.llama4_maverick_17b_128e_instruct.value,
+    ),
 ]
--- a/llama_stack/providers/remote/inference/groq_openai_compat/init.py
+++ b/llama_stack/providers/remote/inference/groq_openai_compat/init.py
@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import Inference
+
+from .config import GroqCompatConfig
+
+
+async def get_adapter_impl(config: GroqCompatConfig, _deps) -> Inference:
+    # import dynamically so the import is used only when it is needed
+    from .groq import GroqCompatInferenceAdapter
+
+    adapter = GroqCompatInferenceAdapter(config)
+    return adapter
--- a/llama_stack/providers/remote/inference/groq_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/groq_openai_compat/config.py
@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class GroqProviderDataValidator(BaseModel):
+    groq_api_key: Optional[str] = Field(
+        default=None,
+        description="API key for Groq models",
+    )
+
+
+@json_schema_type
+class GroqCompatConfig(BaseModel):
+    api_key: Optional[str] = Field(
+        default=None,
+        description="The Groq API key",
+    )
+
+    openai_compat_api_base: str = Field(
+        default="https://api.groq.com/openai/v1",
+        description="The URL for the Groq API server",
+    )
+
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> Dict[str, Any]:
+        return {
+            "openai_compat_api_base": "https://api.groq.com/openai/v1",
+            "api_key": api_key,
+        }
--- a/llama_stack/providers/remote/inference/groq_openai_compat/groq.py
+++ b/llama_stack/providers/remote/inference/groq_openai_compat/groq.py
@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.remote.inference.groq_openai_compat.config import GroqCompatConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+
+from ..groq.models import MODEL_ENTRIES
+
+
+class GroqCompatInferenceAdapter(LiteLLMOpenAIMixin):
+    _config: GroqCompatConfig
+
+    def __init__(self, config: GroqCompatConfig):
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            model_entries=MODEL_ENTRIES,
+            api_key_from_config=config.api_key,
+            provider_data_api_key_field="groq_api_key",
+            openai_compat_api_base=config.openai_compat_api_base,
+        )
+        self.config = config
+
+    async def initialize(self):
+        await super().initialize()
+
+    async def shutdown(self):
+        await super().shutdown()
--- a/llama_stack/providers/remote/inference/sambanova/models.py
+++ b/llama_stack/providers/remote/inference/sambanova/models.py
@ -46,4 +46,8 @@ MODEL_ENTRIES = [
        "Meta-Llama-Guard-3-8B",
        CoreModelId.llama_guard_3_8b.value,
    ),
+    build_hf_repo_model_entry(
+        "Llama-4-Scout-17B-16E-Instruct",
+        CoreModelId.llama4_scout_17b_16e_instruct.value,
+    ),
 ]
--- a/llama_stack/providers/remote/inference/sambanova_openai_compat/init.py
+++ b/llama_stack/providers/remote/inference/sambanova_openai_compat/init.py
@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import Inference
+
+from .config import SambaNovaCompatConfig
+
+
+async def get_adapter_impl(config: SambaNovaCompatConfig, _deps) -> Inference:
+    # import dynamically so the import is used only when it is needed
+    from .sambanova import SambaNovaCompatInferenceAdapter
+
+    adapter = SambaNovaCompatInferenceAdapter(config)
+    return adapter
--- a/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py
@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class SambaNovaProviderDataValidator(BaseModel):
+    sambanova_api_key: Optional[str] = Field(
+        default=None,
+        description="API key for SambaNova models",
+    )
+
+
+@json_schema_type
+class SambaNovaCompatConfig(BaseModel):
+    api_key: Optional[str] = Field(
+        default=None,
+        description="The SambaNova API key",
+    )
+
+    openai_compat_api_base: str = Field(
+        default="https://api.sambanova.ai/v1",
+        description="The URL for the SambaNova API server",
+    )
+
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> Dict[str, Any]:
+        return {
+            "openai_compat_api_base": "https://api.sambanova.ai/v1",
+            "api_key": api_key,
+        }
--- a/llama_stack/providers/remote/inference/sambanova_openai_compat/sambanova.py
+++ b/llama_stack/providers/remote/inference/sambanova_openai_compat/sambanova.py
@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.remote.inference.sambanova_openai_compat.config import SambaNovaCompatConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+
+from ..sambanova.models import MODEL_ENTRIES
+
+
+class SambaNovaCompatInferenceAdapter(LiteLLMOpenAIMixin):
+    _config: SambaNovaCompatConfig
+
+    def __init__(self, config: SambaNovaCompatConfig):
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            model_entries=MODEL_ENTRIES,
+            api_key_from_config=config.api_key,
+            provider_data_api_key_field="sambanova_api_key",
+            openai_compat_api_base=config.openai_compat_api_base,
+        )
+        self.config = config
+
+    async def initialize(self):
+        await super().initialize()
+
+    async def shutdown(self):
+        await super().shutdown()
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@ -64,4 +64,18 @@ MODEL_ENTRIES = [
            "context_length": 32768,
        },
    ),
+    build_hf_repo_model_entry(
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        CoreModelId.llama4_scout_17b_16e_instruct.value,
+        additional_aliases=[
+            "together/meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        ],
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        CoreModelId.llama4_maverick_17b_128e_instruct.value,
+        additional_aliases=[
+            "together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        ],
+    ),
 ]
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@ -118,7 +118,7 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi

    async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
        params = await self._get_params(request)
-        client = await self._get_client()
+        client = self._get_client()
        stream = await client.completions.create(**params)
        async for chunk in process_completion_stream_response(stream):
            yield chunk
--- a/llama_stack/providers/remote/inference/together_openai_compat/init.py
+++ b/llama_stack/providers/remote/inference/together_openai_compat/init.py
@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import Inference
+
+from .config import TogetherCompatConfig
+
+
+async def get_adapter_impl(config: TogetherCompatConfig, _deps) -> Inference:
+    # import dynamically so the import is used only when it is needed
+    from .together import TogetherCompatInferenceAdapter
+
+    adapter = TogetherCompatInferenceAdapter(config)
+    return adapter
--- a/llama_stack/providers/remote/inference/together_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/together_openai_compat/config.py
@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class TogetherProviderDataValidator(BaseModel):
+    together_api_key: Optional[str] = Field(
+        default=None,
+        description="API key for Together models",
+    )
+
+
+@json_schema_type
+class TogetherCompatConfig(BaseModel):
+    api_key: Optional[str] = Field(
+        default=None,
+        description="The Together API key",
+    )
+
+    openai_compat_api_base: str = Field(
+        default="https://api.together.xyz/v1",
+        description="The URL for the Together API server",
+    )
+
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.TOGETHER_API_KEY}", **kwargs) -> Dict[str, Any]:
+        return {
+            "openai_compat_api_base": "https://api.together.xyz/v1",
+            "api_key": api_key,
+        }
--- a/llama_stack/providers/remote/inference/together_openai_compat/together.py
+++ b/llama_stack/providers/remote/inference/together_openai_compat/together.py
@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.remote.inference.together_openai_compat.config import TogetherCompatConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+
+from ..together.models import MODEL_ENTRIES
+
+
+class TogetherCompatInferenceAdapter(LiteLLMOpenAIMixin):
+    _config: TogetherCompatConfig
+
+    def __init__(self, config: TogetherCompatConfig):
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            model_entries=MODEL_ENTRIES,
+            api_key_from_config=config.api_key,
+            provider_data_api_key_field="together_api_key",
+            openai_compat_api_base=config.openai_compat_api_base,
+        )
+        self.config = config
+
+    async def initialize(self):
+        await super().initialize()
+
+    async def shutdown(self):
+        await super().shutdown()