feat(providers): support non-llama models for inference providers (#1200)

This PR begins the process of supporting non-llama models within Llama
Stack. We start simple by adding support for this functionality within a
few existing providers: fireworks, together and ollama.

## Test Plan

```bash
LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/client-sdk/inference/test_text_inference.py \
  --inference-model accounts/fireworks/models/phi-3-vision-128k-instruct
```

^ this passes most of the tests but as expected fails the tool calling
related tests since they are very specific to Llama models

```
inference/test_text_inference.py::test_text_completion_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED
inference/test_text_inference.py::test_completion_log_probs_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED
inference/test_text_inference.py::test_completion_log_probs_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED
inference/test_text_inference.py::test_text_completion_structured_output[accounts/fireworks/models/phi-3-vision-128k-instruct-completion-01] PASSED
inference/test_text_inference.py::test_text_chat_completion_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-Which planet do humans live on?-Earth] PASSED
inference/test_text_inference.py::test_text_chat_completion_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-Which planet has rings around it with a name starting w
ith letter S?-Saturn] PASSED
inference/test_text_inference.py::test_text_chat_completion_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-What's the name of the Sun in latin?-Sol] PASSED
inference/test_text_inference.py::test_text_chat_completion_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-What is the name of the US captial?-Washington] PASSED
inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] FAILED
inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] FAILED
inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_required[accounts/fireworks/models/phi-3-vision-128k-instruct] FAILED
inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_none[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED
inference/test_text_inference.py::test_text_chat_completion_structured_output[accounts/fireworks/models/phi-3-vision-128k-instruct] ERROR
inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[accounts/fireworks/models/phi-3-vision-128k-instruct-True] PASSED
inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[accounts/fireworks/models/phi-3-vision-128k-instruct-False] PASSED
```
This commit is contained in:
Ashwin Bharambe 2025-02-21 13:21:28 -08:00 committed by GitHub
parent 9bbe34694d
commit ab54b8cd58
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 103 additions and 74 deletions

View file

@ -42,28 +42,30 @@ def pytest_addoption(parser):
)
parser.addoption(
"--inference-model",
action="store",
default=TEXT_MODEL,
help="Specify the inference model to use for testing",
)
parser.addoption(
"--vision-inference-model",
action="store",
default=VISION_MODEL,
help="Specify the vision inference model to use for testing",
)
parser.addoption(
"--safety-shield",
action="store",
default="meta-llama/Llama-Guard-3-1B",
help="Specify the safety shield model to use for testing",
)
parser.addoption(
"--embedding-model",
action="store",
default=TEXT_MODEL,
default=None,
help="Specify the embedding model to use for testing",
)
parser.addoption(
"--embedding-dimension",
type=int,
default=384,
help="Output dimensionality of the embedding model to use for testing",
)
@pytest.fixture(scope="session")
@ -78,7 +80,7 @@ def provider_data():
@pytest.fixture(scope="session")
def llama_stack_client(provider_data):
def llama_stack_client(provider_data, text_model_id):
if os.environ.get("LLAMA_STACK_CONFIG"):
client = LlamaStackAsLibraryClient(
get_env_or_fail("LLAMA_STACK_CONFIG"),
@ -95,6 +97,45 @@ def llama_stack_client(provider_data):
)
else:
raise ValueError("LLAMA_STACK_CONFIG or LLAMA_STACK_BASE_URL must be set")
return client
@pytest.fixture(scope="session")
def inference_provider_type(llama_stack_client):
providers = llama_stack_client.providers.list()
inference_providers = [p for p in providers if p.api == "inference"]
assert len(inference_providers) > 0, "No inference providers found"
return inference_providers[0].provider_type
@pytest.fixture(scope="session")
def client_with_models(llama_stack_client, text_model_id, vision_model_id, embedding_model_id, embedding_dimension):
client = llama_stack_client
providers = [p for p in client.providers.list() if p.api == "inference"]
assert len(providers) > 0, "No inference providers found"
inference_providers = [p.provider_id for p in providers if p.provider_type != "inline::sentence-transformers"]
if text_model_id:
client.models.register(model_id=text_model_id, provider_id=inference_providers[0])
if vision_model_id:
client.models.register(model_id=vision_model_id, provider_id=inference_providers[0])
if embedding_model_id and embedding_dimension:
# try to find a provider that supports embeddings, if sentence-transformers is not available
selected_provider = None
for p in providers:
if p.provider_type == "inline::sentence-transformers":
selected_provider = p
break
selected_provider = selected_provider or providers[0]
client.models.register(
model_id=embedding_model_id,
provider_id=selected_provider.provider_id,
model_type="embedding",
metadata={"embedding_dimension": embedding_dimension},
)
return client
@ -117,3 +158,9 @@ def pytest_generate_tests(metafunc):
[metafunc.config.getoption("--embedding-model")],
scope="session",
)
if "embedding_dimension" in metafunc.fixturenames:
metafunc.parametrize(
"embedding_dimension",
[metafunc.config.getoption("--embedding-dimension")],
scope="session",
)