Kill "remote" providers and fix testing with a remote stack properly (#435)

# What does this PR do?

This PR kills the notion of "pure passthrough" remote providers. You
cannot specify a single provider you must specify a whole distribution
(stack) as remote.

This PR also significantly fixes / upgrades testing infrastructure so
you can now test against a remotely hosted stack server by just doing

```bash
pytest -s -v -m remote  test_agents.py \
  --inference-model=Llama3.1-8B-Instruct --safety-shield=Llama-Guard-3-1B \
  --env REMOTE_STACK_URL=http://localhost:5001
```

Also fixed `test_agents_persistence.py` (which was broken) and killed
some deprecated testing functions.

## Test Plan

All the tests.
This commit is contained in:
Ashwin Bharambe 2024-11-12 21:51:29 -08:00 committed by GitHub
parent 59a65e34d3
commit 12947ac19e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 406 additions and 519 deletions

View file

@ -21,7 +21,7 @@ from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
from llama_stack.providers.remote.inference.together import TogetherImplConfig
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
from llama_stack.providers.tests.resolver import construct_stack_for_test
from ..conftest import ProviderFixture, remote_stack_fixture
from ..env import get_env_or_fail
@ -182,15 +182,11 @@ INFERENCE_FIXTURES = [
async def inference_stack(request, inference_model):
fixture_name = request.param
inference_fixture = request.getfixturevalue(f"inference_{fixture_name}")
impls = await resolve_impls_for_test_v2(
test_stack = await construct_stack_for_test(
[Api.inference],
{"inference": inference_fixture.providers},
inference_fixture.provider_data,
models=[
ModelInput(
model_id=inference_model,
)
],
models=[ModelInput(model_id=inference_model)],
)
return (impls[Api.inference], impls[Api.models])
return test_stack.impls[Api.inference], test_stack.impls[Api.models]

View file

@ -147,9 +147,9 @@ class TestInference:
user_input = "Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003."
response = await inference_impl.completion(
model_id=inference_model,
content=user_input,
stream=False,
model=inference_model,
sampling_params=SamplingParams(
max_tokens=50,
),

View file

@ -55,7 +55,7 @@ class TestVisionModelInference:
)
response = await inference_impl.chat_completion(
model=inference_model,
model_id=inference_model,
messages=[
UserMessage(content="You are a helpful assistant."),
UserMessage(content=[image, "Describe this image in two sentences."]),
@ -102,7 +102,7 @@ class TestVisionModelInference:
response = [
r
async for r in await inference_impl.chat_completion(
model=inference_model,
model_id=inference_model,
messages=[
UserMessage(content="You are a helpful assistant."),
UserMessage(