mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
fixes tests & move braintrust api_keys to request headers (#535)
# What does this PR do?
- braintrust scoring provider requires OPENAI_API_KEY env variable to be
set
- move this to be able to be set as request headers (e.g. like together
/ fireworks api keys)
- fixes pytest with agents dependency
## Test Plan
**E2E**
```
llama stack run
```
```yaml
scoring:
- provider_id: braintrust-0
provider_type: inline::braintrust
config: {}
```
**Client**
```python
self.client = LlamaStackClient(
base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:5000"),
provider_data={
"openai_api_key": os.environ.get("OPENAI_API_KEY", ""),
},
)
```
- run `llama-stack-client eval run_scoring`
**Unit Test**
```
pytest -v -s -m meta_reference_eval_together_inference eval/test_eval.py
```
```
pytest -v -s -m braintrust_scoring_together_inference scoring/test_scoring.py --env OPENAI_API_KEY=$OPENAI_API_KEY
```
<img width="745" alt="image"
src="https://github.com/user-attachments/assets/68f5cdda-f6c8-496d-8b4f-1b3dabeca9c2">
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Ran pre-commit to handle lint / formatting issues.
- [ ] Read the [contributor
guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
Pull Request section?
- [ ] Updated relevant documentation.
- [ ] Wrote necessary unit or integration tests.
This commit is contained in:
parent
d3956a1d22
commit
50cc165077
8 changed files with 72 additions and 8 deletions
|
|
@ -6,10 +6,14 @@
|
|||
|
||||
import pytest
|
||||
|
||||
from ..agents.fixtures import AGENTS_FIXTURES
|
||||
|
||||
from ..conftest import get_provider_fixture_overrides
|
||||
|
||||
from ..datasetio.fixtures import DATASETIO_FIXTURES
|
||||
from ..inference.fixtures import INFERENCE_FIXTURES
|
||||
from ..memory.fixtures import MEMORY_FIXTURES
|
||||
from ..safety.fixtures import SAFETY_FIXTURES
|
||||
from ..scoring.fixtures import SCORING_FIXTURES
|
||||
from .fixtures import EVAL_FIXTURES
|
||||
|
||||
|
|
@ -20,6 +24,9 @@ DEFAULT_PROVIDER_COMBINATIONS = [
|
|||
"scoring": "basic",
|
||||
"datasetio": "localfs",
|
||||
"inference": "fireworks",
|
||||
"agents": "meta_reference",
|
||||
"safety": "llama_guard",
|
||||
"memory": "faiss",
|
||||
},
|
||||
id="meta_reference_eval_fireworks_inference",
|
||||
marks=pytest.mark.meta_reference_eval_fireworks_inference,
|
||||
|
|
@ -30,6 +37,9 @@ DEFAULT_PROVIDER_COMBINATIONS = [
|
|||
"scoring": "basic",
|
||||
"datasetio": "localfs",
|
||||
"inference": "together",
|
||||
"agents": "meta_reference",
|
||||
"safety": "llama_guard",
|
||||
"memory": "faiss",
|
||||
},
|
||||
id="meta_reference_eval_together_inference",
|
||||
marks=pytest.mark.meta_reference_eval_together_inference,
|
||||
|
|
@ -40,6 +50,9 @@ DEFAULT_PROVIDER_COMBINATIONS = [
|
|||
"scoring": "basic",
|
||||
"datasetio": "huggingface",
|
||||
"inference": "together",
|
||||
"agents": "meta_reference",
|
||||
"safety": "llama_guard",
|
||||
"memory": "faiss",
|
||||
},
|
||||
id="meta_reference_eval_together_inference_huggingface_datasetio",
|
||||
marks=pytest.mark.meta_reference_eval_together_inference_huggingface_datasetio,
|
||||
|
|
@ -75,6 +88,9 @@ def pytest_generate_tests(metafunc):
|
|||
"scoring": SCORING_FIXTURES,
|
||||
"datasetio": DATASETIO_FIXTURES,
|
||||
"inference": INFERENCE_FIXTURES,
|
||||
"agents": AGENTS_FIXTURES,
|
||||
"safety": SAFETY_FIXTURES,
|
||||
"memory": MEMORY_FIXTURES,
|
||||
}
|
||||
combinations = (
|
||||
get_provider_fixture_overrides(metafunc.config, available_fixtures)
|
||||
|
|
|
|||
|
|
@ -40,14 +40,30 @@ async def eval_stack(request):
|
|||
|
||||
providers = {}
|
||||
provider_data = {}
|
||||
for key in ["datasetio", "eval", "scoring", "inference"]:
|
||||
for key in [
|
||||
"datasetio",
|
||||
"eval",
|
||||
"scoring",
|
||||
"inference",
|
||||
"agents",
|
||||
"safety",
|
||||
"memory",
|
||||
]:
|
||||
fixture = request.getfixturevalue(f"{key}_{fixture_dict[key]}")
|
||||
providers[key] = fixture.providers
|
||||
if fixture.provider_data:
|
||||
provider_data.update(fixture.provider_data)
|
||||
|
||||
test_stack = await construct_stack_for_test(
|
||||
[Api.eval, Api.datasetio, Api.inference, Api.scoring],
|
||||
[
|
||||
Api.eval,
|
||||
Api.datasetio,
|
||||
Api.inference,
|
||||
Api.scoring,
|
||||
Api.agents,
|
||||
Api.safety,
|
||||
Api.memory,
|
||||
],
|
||||
providers,
|
||||
provider_data,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -10,9 +10,10 @@ import pytest_asyncio
|
|||
from llama_stack.apis.models import ModelInput
|
||||
|
||||
from llama_stack.distribution.datatypes import Api, Provider
|
||||
|
||||
from llama_stack.providers.inline.scoring.braintrust import BraintrustScoringConfig
|
||||
from llama_stack.providers.tests.resolver import construct_stack_for_test
|
||||
from ..conftest import ProviderFixture, remote_stack_fixture
|
||||
from ..env import get_env_or_fail
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
|
|
@ -40,7 +41,9 @@ def scoring_braintrust() -> ProviderFixture:
|
|||
Provider(
|
||||
provider_id="braintrust",
|
||||
provider_type="inline::braintrust",
|
||||
config={},
|
||||
config=BraintrustScoringConfig(
|
||||
openai_api_key=get_env_or_fail("OPENAI_API_KEY"),
|
||||
).model_dump(),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue