forked from phoenix-oss/llama-stack-mirror
		
	# What does this PR do?
- braintrust scoring provider requires OPENAI_API_KEY env variable to be
set
- move this to be able to be set as request headers (e.g. like together
/ fireworks api keys)
- fixes pytest with agents dependency
## Test Plan
**E2E**
```
llama stack run 
```
```yaml
scoring:
  - provider_id: braintrust-0
    provider_type: inline::braintrust
    config: {}
```
**Client**
```python
self.client = LlamaStackClient(
    base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:5000"),
    provider_data={
        "openai_api_key": os.environ.get("OPENAI_API_KEY", ""),
    },
)
```
- run `llama-stack-client eval run_scoring`
**Unit Test**
```
pytest -v -s -m meta_reference_eval_together_inference eval/test_eval.py
```
```
pytest -v -s -m braintrust_scoring_together_inference scoring/test_scoring.py --env OPENAI_API_KEY=$OPENAI_API_KEY
```
<img width="745" alt="image"
src="https://github.com/user-attachments/assets/68f5cdda-f6c8-496d-8b4f-1b3dabeca9c2">
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Ran pre-commit to handle lint / formatting issues.
- [ ] Read the [contributor
guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
      Pull Request section?
- [ ] Updated relevant documentation.
- [ ] Wrote necessary unit or integration tests.
		
	
			
		
			
				
	
	
		
			99 lines
		
	
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			99 lines
		
	
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright (c) Meta Platforms, Inc. and affiliates.
 | |
| # All rights reserved.
 | |
| #
 | |
| # This source code is licensed under the terms described in the LICENSE file in
 | |
| # the root directory of this source tree.
 | |
| 
 | |
| import pytest
 | |
| 
 | |
| from ..agents.fixtures import AGENTS_FIXTURES
 | |
| 
 | |
| from ..conftest import get_provider_fixture_overrides
 | |
| 
 | |
| from ..datasetio.fixtures import DATASETIO_FIXTURES
 | |
| from ..inference.fixtures import INFERENCE_FIXTURES
 | |
| from ..memory.fixtures import MEMORY_FIXTURES
 | |
| from ..safety.fixtures import SAFETY_FIXTURES
 | |
| from ..scoring.fixtures import SCORING_FIXTURES
 | |
| from .fixtures import EVAL_FIXTURES
 | |
| 
 | |
| DEFAULT_PROVIDER_COMBINATIONS = [
 | |
|     pytest.param(
 | |
|         {
 | |
|             "eval": "meta_reference",
 | |
|             "scoring": "basic",
 | |
|             "datasetio": "localfs",
 | |
|             "inference": "fireworks",
 | |
|             "agents": "meta_reference",
 | |
|             "safety": "llama_guard",
 | |
|             "memory": "faiss",
 | |
|         },
 | |
|         id="meta_reference_eval_fireworks_inference",
 | |
|         marks=pytest.mark.meta_reference_eval_fireworks_inference,
 | |
|     ),
 | |
|     pytest.param(
 | |
|         {
 | |
|             "eval": "meta_reference",
 | |
|             "scoring": "basic",
 | |
|             "datasetio": "localfs",
 | |
|             "inference": "together",
 | |
|             "agents": "meta_reference",
 | |
|             "safety": "llama_guard",
 | |
|             "memory": "faiss",
 | |
|         },
 | |
|         id="meta_reference_eval_together_inference",
 | |
|         marks=pytest.mark.meta_reference_eval_together_inference,
 | |
|     ),
 | |
|     pytest.param(
 | |
|         {
 | |
|             "eval": "meta_reference",
 | |
|             "scoring": "basic",
 | |
|             "datasetio": "huggingface",
 | |
|             "inference": "together",
 | |
|             "agents": "meta_reference",
 | |
|             "safety": "llama_guard",
 | |
|             "memory": "faiss",
 | |
|         },
 | |
|         id="meta_reference_eval_together_inference_huggingface_datasetio",
 | |
|         marks=pytest.mark.meta_reference_eval_together_inference_huggingface_datasetio,
 | |
|     ),
 | |
| ]
 | |
| 
 | |
| 
 | |
| def pytest_configure(config):
 | |
|     for fixture_name in [
 | |
|         "meta_reference_eval_fireworks_inference",
 | |
|         "meta_reference_eval_together_inference",
 | |
|         "meta_reference_eval_together_inference_huggingface_datasetio",
 | |
|     ]:
 | |
|         config.addinivalue_line(
 | |
|             "markers",
 | |
|             f"{fixture_name}: marks tests as {fixture_name} specific",
 | |
|         )
 | |
| 
 | |
| 
 | |
| def pytest_addoption(parser):
 | |
|     parser.addoption(
 | |
|         "--inference-model",
 | |
|         action="store",
 | |
|         default="meta-llama/Llama-3.2-3B-Instruct",
 | |
|         help="Specify the inference model to use for testing",
 | |
|     )
 | |
| 
 | |
| 
 | |
| def pytest_generate_tests(metafunc):
 | |
|     if "eval_stack" in metafunc.fixturenames:
 | |
|         available_fixtures = {
 | |
|             "eval": EVAL_FIXTURES,
 | |
|             "scoring": SCORING_FIXTURES,
 | |
|             "datasetio": DATASETIO_FIXTURES,
 | |
|             "inference": INFERENCE_FIXTURES,
 | |
|             "agents": AGENTS_FIXTURES,
 | |
|             "safety": SAFETY_FIXTURES,
 | |
|             "memory": MEMORY_FIXTURES,
 | |
|         }
 | |
|         combinations = (
 | |
|             get_provider_fixture_overrides(metafunc.config, available_fixtures)
 | |
|             or DEFAULT_PROVIDER_COMBINATIONS
 | |
|         )
 | |
|         metafunc.parametrize("eval_stack", combinations, indirect=True)
 |