mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-06 10:42:39 +00:00
pre
This commit is contained in:
parent
d1b44c1251
commit
443b18a992
5 changed files with 7 additions and 127 deletions
67
docs/_static/llama-stack-spec.html
vendored
67
docs/_static/llama-stack-spec.html
vendored
|
@ -7681,73 +7681,6 @@
|
|||
"title": "EvaluationResponse",
|
||||
"description": "A response to an inline evaluation."
|
||||
},
|
||||
"ScoringResult": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scores": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The scoring result for each row. Each row is a map of grader column name to value."
|
||||
},
|
||||
"metrics": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Map of metric name to aggregated value."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"scores",
|
||||
"metrics"
|
||||
],
|
||||
"title": "ScoringResult",
|
||||
"description": "A scoring result for a single row."
|
||||
},
|
||||
"HealthInfo": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
35
docs/_static/llama-stack-spec.yaml
vendored
35
docs/_static/llama-stack-spec.yaml
vendored
|
@ -5359,41 +5359,6 @@ components:
|
|||
- grades
|
||||
title: EvaluationResponse
|
||||
description: A response to an inline evaluation.
|
||||
ScoringResult:
|
||||
type: object
|
||||
properties:
|
||||
scores:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The scoring result for each row. Each row is a map of grader column name
|
||||
to value.
|
||||
metrics:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Map of metric name to aggregated value.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- scores
|
||||
- metrics
|
||||
title: ScoringResult
|
||||
description: A scoring result for a single row.
|
||||
HealthInfo:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
@ -51,14 +51,8 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:
|
|||
|
||||
|
||||
def providable_apis() -> List[Api]:
|
||||
routing_table_apis = {
|
||||
x.routing_table_api for x in builtin_automatically_routed_apis()
|
||||
}
|
||||
return [
|
||||
api
|
||||
for api in Api
|
||||
if api not in routing_table_apis and api not in [Api.inspect, Api.providers]
|
||||
]
|
||||
routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
|
||||
return [api for api in Api if api not in routing_table_apis and api not in [Api.inspect, Api.providers]]
|
||||
|
||||
|
||||
def get_provider_registry() -> Dict[Api, Dict[str, ProviderSpec]]:
|
||||
|
|
|
@ -22,9 +22,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
purpose="eval/messages-answer",
|
||||
source={
|
||||
"type": "uri",
|
||||
"uri": data_url_from_file(
|
||||
Path(__file__).parent.parent / "datasets" / "test_dataset.csv"
|
||||
),
|
||||
"uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
|
||||
},
|
||||
)
|
||||
response = llama_stack_client.datasets.list()
|
||||
|
@ -74,9 +72,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
purpose="eval/messages-answer",
|
||||
source={
|
||||
"type": "uri",
|
||||
"uri": data_url_from_file(
|
||||
Path(__file__).parent.parent / "datasets" / "test_dataset.csv"
|
||||
),
|
||||
"uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
|
||||
},
|
||||
)
|
||||
benchmark_id = str(uuid.uuid4())
|
||||
|
@ -99,14 +95,10 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
},
|
||||
)
|
||||
assert response.job_id == "0"
|
||||
job_status = llama_stack_client.eval.jobs.status(
|
||||
job_id=response.job_id, benchmark_id=benchmark_id
|
||||
)
|
||||
job_status = llama_stack_client.eval.jobs.status(job_id=response.job_id, benchmark_id=benchmark_id)
|
||||
assert job_status and job_status == "completed"
|
||||
|
||||
eval_response = llama_stack_client.eval.jobs.retrieve(
|
||||
job_id=response.job_id, benchmark_id=benchmark_id
|
||||
)
|
||||
eval_response = llama_stack_client.eval.jobs.retrieve(job_id=response.job_id, benchmark_id=benchmark_id)
|
||||
assert eval_response is not None
|
||||
assert len(eval_response.generations) == 5
|
||||
assert scoring_fn_id in eval_response.scores
|
||||
|
|
|
@ -154,11 +154,7 @@ def test_scoring_score_with_aggregation_functions(
|
|||
df = pd.read_csv(Path(__file__).parent.parent / "datasets" / "test_dataset.csv")
|
||||
rows = df.to_dict(orient="records")
|
||||
|
||||
scoring_fns_list = [
|
||||
x
|
||||
for x in llama_stack_client.scoring_functions.list()
|
||||
if x.provider_id == provider_id
|
||||
]
|
||||
scoring_fns_list = [x for x in llama_stack_client.scoring_functions.list() if x.provider_id == provider_id]
|
||||
if len(scoring_fns_list) == 0:
|
||||
pytest.skip(f"No scoring functions found for provider {provider_id}, skipping")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue