forked from phoenix-oss/llama-stack-mirror
Sambanova inference provider (#555)
# What does this PR do? This PR adds SambaNova as one of the Provider - Add SambaNova as a provider ## Test Plan Test the functional command ``` pytest -s -v --providers inference=sambanova llama_stack/providers/tests/inference/test_embeddings.py llama_stack/providers/tests/inference/test_prompt_adapter.py llama_stack/providers/tests/inference/test_text_inference.py llama_stack/providers/tests/inference/test_vision_inference.py --env SAMBANOVA_API_KEY=<sambanova-api-key> ``` Test the distribution template: ``` # Docker LLAMA_STACK_PORT=5001 docker run -it -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-sambanova \ --port $LLAMA_STACK_PORT \ --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY # Conda llama stack build --template sambanova --image-type conda llama stack run ./run.yaml \ --port $LLAMA_STACK_PORT \ --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY ``` ## Source [SambaNova API Documentation](https://cloud.sambanova.ai/apis) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [Y] Ran pre-commit to handle lint / formatting issues. - [Y] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [Y] Updated relevant documentation. - [Y ] Wrote necessary unit or integration tests. --------- Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
parent
e2b5456e48
commit
22dc684da6
20 changed files with 870 additions and 2 deletions
|
@ -23,6 +23,7 @@ from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
|||
from llama_stack.providers.remote.inference.groq import GroqConfig
|
||||
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
||||
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
|
||||
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
||||
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
|
||||
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
||||
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
||||
|
@ -232,6 +233,23 @@ def inference_tgi() -> ProviderFixture:
|
|||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def inference_sambanova() -> ProviderFixture:
|
||||
return ProviderFixture(
|
||||
providers=[
|
||||
Provider(
|
||||
provider_id="sambanova",
|
||||
provider_type="remote::sambanova",
|
||||
config=SambaNovaImplConfig(
|
||||
api_key=get_env_or_fail("SAMBANOVA_API_KEY"),
|
||||
).model_dump(),
|
||||
)
|
||||
],
|
||||
provider_data=dict(
|
||||
sambanova_api_key=get_env_or_fail("SAMBANOVA_API_KEY"),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def inference_sentence_transformers() -> ProviderFixture:
|
||||
return ProviderFixture(
|
||||
providers=[
|
||||
|
@ -282,6 +300,7 @@ INFERENCE_FIXTURES = [
|
|||
"cerebras",
|
||||
"nvidia",
|
||||
"tgi",
|
||||
"sambanova",
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ class TestModelRegistration:
|
|||
},
|
||||
)
|
||||
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
await models_impl.register_model(
|
||||
model_id="custom-model-2",
|
||||
metadata={
|
||||
|
|
|
@ -385,6 +385,12 @@ class TestInference:
|
|||
# TODO(aidand): Remove this skip once Groq's tool calling for Llama3.2 works better
|
||||
pytest.skip("Groq's tool calling for Llama3.2 doesn't work very well")
|
||||
|
||||
if provider.__provider_spec__.provider_type == "remote::sambanova" and (
|
||||
"-1B-" in inference_model or "-3B-" in inference_model
|
||||
):
|
||||
# TODO(snova-edawrdm): Remove this skip once SambaNova's tool calling for 1B/ 3B
|
||||
pytest.skip("Sambanova's tool calling for lightweight models don't work")
|
||||
|
||||
messages = sample_messages + [
|
||||
UserMessage(
|
||||
content="What's the weather like in San Francisco?",
|
||||
|
@ -431,6 +437,9 @@ class TestInference:
|
|||
):
|
||||
# TODO(aidand): Remove this skip once Groq's tool calling for Llama3.2 works better
|
||||
pytest.skip("Groq's tool calling for Llama3.2 doesn't work very well")
|
||||
if provider.__provider_spec__.provider_type == "remote::sambanova":
|
||||
# TODO(snova-edawrdm): Remove this skip once SambaNova's tool calling under streaming is supported (we are working on it)
|
||||
pytest.skip("Sambanova's tool calling for streaming doesn't work")
|
||||
|
||||
messages = sample_messages + [
|
||||
UserMessage(
|
||||
|
|
|
@ -59,6 +59,7 @@ class TestVisionModelInference:
|
|||
"remote::fireworks",
|
||||
"remote::ollama",
|
||||
"remote::vllm",
|
||||
"remote::sambanova",
|
||||
):
|
||||
pytest.skip(
|
||||
"Other inference providers don't support vision chat completion() yet"
|
||||
|
@ -98,6 +99,7 @@ class TestVisionModelInference:
|
|||
"remote::fireworks",
|
||||
"remote::ollama",
|
||||
"remote::vllm",
|
||||
"remote::sambanova",
|
||||
):
|
||||
pytest.skip(
|
||||
"Other inference providers don't support vision chat completion() yet"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue