Sambanova inference provider (#555)

# What does this PR do? This PR adds SambaNova as one of the Provider - Add SambaNova as a provider ## Test Plan Test the functional command ``` pytest -s -v --providers inference=sambanova llama_stack/providers/tests/inference/test_embeddings.py llama_stack/providers/tests/inference/test_prompt_adapter.py llama_stack/providers/tests/inference/test_text_inference.py llama_stack/providers/tests/inference/test_vision_inference.py --env SAMBANOVA_API_KEY=<sambanova-api-key> ``` Test the distribution template: ``` # Docker LLAMA_STACK_PORT=5001 docker run -it -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-sambanova \ --port $LLAMA_STACK_PORT \ --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY # Conda llama stack build --template sambanova --image-type conda llama stack run ./run.yaml \ --port $LLAMA_STACK_PORT \ --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY ``` ## Source [SambaNova API Documentation](https://cloud.sambanova.ai/apis) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [Y] Ran pre-commit to handle lint / formatting issues. - [Y] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [Y] Updated relevant documentation. - [Y ] Wrote necessary unit or integration tests. --------- Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-01-23 12:20:28 -08:00 · 2025-01-23 12:20:28 -08:00 · 22dc684da6
commit 22dc684da6
parent e2b5456e48
20 changed files with 870 additions and 2 deletions
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@ -23,6 +23,7 @@ from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
 from llama_stack.providers.remote.inference.groq import GroqConfig
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
+from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
 from llama_stack.providers.remote.inference.tgi import TGIImplConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
 from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
@ -232,6 +233,23 @@ def inference_tgi() -> ProviderFixture:


@pytest.fixture(scope="session")
+def inference_sambanova() -> ProviderFixture:
+    return ProviderFixture(
+        providers=[
+            Provider(
+                provider_id="sambanova",
+                provider_type="remote::sambanova",
+                config=SambaNovaImplConfig(
+                    api_key=get_env_or_fail("SAMBANOVA_API_KEY"),
+                ).model_dump(),
+            )
+        ],
+        provider_data=dict(
+            sambanova_api_key=get_env_or_fail("SAMBANOVA_API_KEY"),
+        ),
+    )
+
+
 def inference_sentence_transformers() -> ProviderFixture:
    return ProviderFixture(
        providers=[
@ -282,6 +300,7 @@ INFERENCE_FIXTURES = [
    "cerebras",
    "nvidia",
    "tgi",
+    "sambanova",
 ]


--- a/llama_stack/providers/tests/inference/test_model_registration.py
+++ b/llama_stack/providers/tests/inference/test_model_registration.py
@ -59,7 +59,7 @@ class TestModelRegistration:
            },
        )

-        with pytest.raises(AssertionError) as exc_info:
+        with pytest.raises(ValueError) as exc_info:
            await models_impl.register_model(
                model_id="custom-model-2",
                metadata={
--- a/llama_stack/providers/tests/inference/test_text_inference.py
+++ b/llama_stack/providers/tests/inference/test_text_inference.py
@ -385,6 +385,12 @@ class TestInference:
            # TODO(aidand): Remove this skip once Groq's tool calling for Llama3.2 works better
            pytest.skip("Groq's tool calling for Llama3.2 doesn't work very well")

+        if provider.__provider_spec__.provider_type == "remote::sambanova" and (
+            "-1B-" in inference_model or "-3B-" in inference_model
+        ):
+            # TODO(snova-edawrdm): Remove this skip once SambaNova's tool calling for 1B/ 3B
+            pytest.skip("Sambanova's tool calling for lightweight models don't work")
+
        messages = sample_messages + [
            UserMessage(
                content="What's the weather like in San Francisco?",
@ -431,6 +437,9 @@ class TestInference:
        ):
            # TODO(aidand): Remove this skip once Groq's tool calling for Llama3.2 works better
            pytest.skip("Groq's tool calling for Llama3.2 doesn't work very well")
+        if provider.__provider_spec__.provider_type == "remote::sambanova":
+            # TODO(snova-edawrdm): Remove this skip once SambaNova's tool calling under streaming is supported (we are working on it)
+            pytest.skip("Sambanova's tool calling for streaming doesn't work")

        messages = sample_messages + [
            UserMessage(
--- a/llama_stack/providers/tests/inference/test_vision_inference.py
+++ b/llama_stack/providers/tests/inference/test_vision_inference.py
@ -59,6 +59,7 @@ class TestVisionModelInference:
            "remote::fireworks",
            "remote::ollama",
            "remote::vllm",
+            "remote::sambanova",
        ):
            pytest.skip(
                "Other inference providers don't support vision chat completion() yet"
@ -98,6 +99,7 @@ class TestVisionModelInference:
            "remote::fireworks",
            "remote::ollama",
            "remote::vllm",
+            "remote::sambanova",
        ):
            pytest.skip(
                "Other inference providers don't support vision chat completion() yet"