Merge branch 'main' into feat/gunicorn-production-server

This commit is contained in:
Roy Belio 2025-11-04 15:57:41 +02:00 committed by GitHub
commit 9ff881a28a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 439 additions and 11392 deletions

View file

@ -977,11 +977,11 @@ paths:
get:
responses:
'200':
description: A ListModelsResponse.
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListModelsResponse'
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -994,8 +994,8 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List all models.
description: List all models.
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
post:
@ -1129,31 +1129,6 @@ paths:
$ref: '#/components/schemas/RunModerationRequest'
required: true
deprecated: false
/v1/openai/v1/models:
get:
responses:
'200':
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
/v1/prompts:
get:
responses:
@ -6823,6 +6798,88 @@ components:
title: ListRoutesResponse
description: >-
Response containing a list of all available API routes.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model:
type: object
properties:
@ -6880,57 +6937,6 @@ components:
title: Model
description: >-
A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
ListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Model'
additionalProperties: false
required:
- data
title: ListModelsResponse
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
RunModerationRequest:
type: object
properties:
@ -7020,48 +7026,6 @@ components:
- metadata
title: ModerationObjectResults
description: A moderation object.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
Prompt:
type: object
properties:

File diff suppressed because it is too large Load diff

View file

@ -974,11 +974,11 @@ paths:
get:
responses:
'200':
description: A ListModelsResponse.
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListModelsResponse'
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -991,8 +991,8 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List all models.
description: List all models.
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
post:
@ -1126,31 +1126,6 @@ paths:
$ref: '#/components/schemas/RunModerationRequest'
required: true
deprecated: false
/v1/openai/v1/models:
get:
responses:
'200':
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
/v1/prompts:
get:
responses:
@ -5610,6 +5585,88 @@ components:
title: ListRoutesResponse
description: >-
Response containing a list of all available API routes.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model:
type: object
properties:
@ -5667,57 +5724,6 @@ components:
title: Model
description: >-
A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
ListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Model'
additionalProperties: false
required:
- data
title: ListModelsResponse
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
RunModerationRequest:
type: object
properties:
@ -5807,48 +5813,6 @@ components:
- metadata
title: ModerationObjectResults
description: A moderation object.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
Prompt:
type: object
properties:

View file

@ -977,11 +977,11 @@ paths:
get:
responses:
'200':
description: A ListModelsResponse.
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListModelsResponse'
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -994,8 +994,8 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List all models.
description: List all models.
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
post:
@ -1129,31 +1129,6 @@ paths:
$ref: '#/components/schemas/RunModerationRequest'
required: true
deprecated: false
/v1/openai/v1/models:
get:
responses:
'200':
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
/v1/prompts:
get:
responses:
@ -6823,6 +6798,88 @@ components:
title: ListRoutesResponse
description: >-
Response containing a list of all available API routes.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model:
type: object
properties:
@ -6880,57 +6937,6 @@ components:
title: Model
description: >-
A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
ListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Model'
additionalProperties: false
required:
- data
title: ListModelsResponse
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
RunModerationRequest:
type: object
properties:
@ -7020,48 +7026,6 @@ components:
- metadata
title: ModerationObjectResults
description: A moderation object.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
Prompt:
type: object
properties:

View file

@ -491,13 +491,6 @@ class Agents(Protocol):
APIs for creating and interacting with agentic systems."""
@webmethod(
route="/agents",
method="POST",
descriptive_name="create_agent",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/agents",
method="POST",
@ -515,13 +508,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn",
method="POST",
descriptive_name="create_agent_turn",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn",
method="POST",
@ -552,13 +538,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
method="POST",
descriptive_name="resume_agent_turn",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
method="POST",
@ -586,12 +565,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
method="GET",
@ -612,12 +585,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
method="GET",
@ -640,13 +607,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/agents/{agent_id}/session",
method="POST",
descriptive_name="create_agent_session",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/agents/{agent_id}/session",
method="POST",
@ -666,12 +626,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/agents/{agent_id}/session/{session_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/agents/{agent_id}/session/{session_id}",
method="GET",
@ -692,12 +646,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/agents/{agent_id}/session/{session_id}",
method="DELETE",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/agents/{agent_id}/session/{session_id}",
method="DELETE",
@ -715,12 +663,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/agents/{agent_id}",
method="DELETE",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def delete_agent(
self,
@ -732,7 +674,6 @@ class Agents(Protocol):
"""
...
@webmethod(route="/agents", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
"""List all agents.
@ -743,12 +684,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/agents/{agent_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_agent(self, agent_id: str) -> Agent:
"""Describe an agent by its ID.
@ -758,12 +693,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/agents/{agent_id}/sessions",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_agent_sessions(
self,
@ -787,12 +716,6 @@ class Agents(Protocol):
#
# Both of these APIs are inherently stateful.
@webmethod(
route="/openai/v1/responses/{response_id}",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_openai_response(
self,
@ -805,7 +728,6 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
async def create_openai_response(
self,
@ -842,7 +764,6 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_responses(
self,
@ -861,9 +782,6 @@ class Agents(Protocol):
"""
...
@webmethod(
route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_response_input_items(
self,
@ -886,7 +804,6 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
"""Delete a response.

View file

@ -43,7 +43,6 @@ class Batches(Protocol):
Note: This API is currently under active development and may undergo changes.
"""
@webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
async def create_batch(
self,
@ -64,7 +63,6 @@ class Batches(Protocol):
"""
...
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
async def retrieve_batch(self, batch_id: str) -> BatchObject:
"""Retrieve information about a specific batch.
@ -74,7 +72,6 @@ class Batches(Protocol):
"""
...
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
async def cancel_batch(self, batch_id: str) -> BatchObject:
"""Cancel a batch that is in progress.
@ -84,7 +81,6 @@ class Batches(Protocol):
"""
...
@webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
async def list_batches(
self,

View file

@ -8,7 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, webmethod
@ -54,7 +54,6 @@ class ListBenchmarksResponse(BaseModel):
@runtime_checkable
class Benchmarks(Protocol):
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_benchmarks(self) -> ListBenchmarksResponse:
"""List all benchmarks.
@ -63,7 +62,6 @@ class Benchmarks(Protocol):
"""
...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_benchmark(
self,
@ -76,7 +74,6 @@ class Benchmarks(Protocol):
"""
...
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def register_benchmark(
self,
@ -98,7 +95,6 @@ class Benchmarks(Protocol):
"""
...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def unregister_benchmark(self, benchmark_id: str) -> None:
"""Unregister a benchmark.

View file

@ -8,7 +8,7 @@ from typing import Any, Protocol, runtime_checkable
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.datasets import Dataset
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA
from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
from llama_stack.schema_utils import webmethod
@ -21,7 +21,6 @@ class DatasetIO(Protocol):
# keeping for aligning with inference/safety, but this is not used
dataset_store: DatasetStore
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
async def iterrows(
self,
@ -46,9 +45,6 @@ class DatasetIO(Protocol):
"""
...
@webmethod(
route="/datasetio/append-rows/{dataset_id:path}", method="POST", deprecated=True, level=LLAMA_STACK_API_V1
)
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA)
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
"""Append rows to a dataset.

View file

@ -10,7 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
from pydantic import BaseModel, Field
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA
from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -146,7 +146,6 @@ class ListDatasetsResponse(BaseModel):
class Datasets(Protocol):
@webmethod(route="/datasets", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
async def register_dataset(
self,
@ -216,7 +215,6 @@ class Datasets(Protocol):
"""
...
@webmethod(route="/datasets/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
async def get_dataset(
self,
@ -229,7 +227,6 @@ class Datasets(Protocol):
"""
...
@webmethod(route="/datasets", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA)
async def list_datasets(self) -> ListDatasetsResponse:
"""List all datasets.
@ -238,7 +235,6 @@ class Datasets(Protocol):
"""
...
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
async def unregister_dataset(
self,

View file

@ -13,7 +13,7 @@ from llama_stack.apis.common.job_types import Job
from llama_stack.apis.inference import SamplingParams, SystemMessage
from llama_stack.apis.scoring import ScoringResult
from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -86,7 +86,6 @@ class Eval(Protocol):
Llama Stack Evaluation API for running evaluations on model and agent candidates."""
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def run_eval(
self,
@ -101,9 +100,6 @@ class Eval(Protocol):
"""
...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def evaluate_rows(
self,
@ -122,9 +118,6 @@ class Eval(Protocol):
"""
...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def job_status(self, benchmark_id: str, job_id: str) -> Job:
"""Get the status of a job.
@ -135,12 +128,6 @@ class Eval(Protocol):
"""
...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
method="DELETE",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
"""Cancel a job.
@ -150,12 +137,6 @@ class Eval(Protocol):
"""
...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
)

View file

@ -110,7 +110,6 @@ class Files(Protocol):
"""
# OpenAI Files API Endpoints
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
async def openai_upload_file(
self,
@ -134,7 +133,6 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_files(
self,
@ -155,7 +153,6 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file(
self,
@ -170,7 +167,6 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_file(
self,
@ -183,7 +179,6 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file_content(
self,

View file

@ -1189,7 +1189,6 @@ class InferenceProvider(Protocol):
raise NotImplementedError("Reranking is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_completion(
self,
@ -1202,7 +1201,6 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_chat_completion(
self,
@ -1215,7 +1213,6 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def openai_embeddings(
self,
@ -1240,7 +1237,6 @@ class Inference(InferenceProvider):
- Rerank models: these models reorder the documents based on their relevance to a query.
"""
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
async def list_chat_completions(
self,
@ -1259,9 +1255,6 @@ class Inference(InferenceProvider):
"""
raise NotImplementedError("List chat completions is not implemented")
@webmethod(
route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
"""Get chat completion.

View file

@ -107,7 +107,6 @@ class OpenAIListModelsResponse(BaseModel):
@runtime_checkable
@trace_protocol
class Models(Protocol):
@webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
async def list_models(self) -> ListModelsResponse:
"""List all models.
@ -115,7 +114,7 @@ class Models(Protocol):
"""
...
@webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_models(self) -> OpenAIListModelsResponse:
"""List models using the OpenAI API.

View file

@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.common.job_types import JobStatus
from llama_stack.apis.common.training_types import Checkpoint
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -284,7 +284,6 @@ class PostTrainingJobArtifactsResponse(BaseModel):
class PostTraining(Protocol):
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def supervised_fine_tune(
self,
@ -312,7 +311,6 @@ class PostTraining(Protocol):
"""
...
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def preference_optimize(
self,
@ -335,7 +333,6 @@ class PostTraining(Protocol):
"""
...
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
"""Get all training jobs.
@ -344,7 +341,6 @@ class PostTraining(Protocol):
"""
...
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
"""Get the status of a training job.
@ -354,7 +350,6 @@ class PostTraining(Protocol):
"""
...
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def cancel_training_job(self, job_uuid: str) -> None:
"""Cancel a training job.
@ -363,7 +358,6 @@ class PostTraining(Protocol):
"""
...
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
"""Get the artifacts of a training job.

View file

@ -121,7 +121,6 @@ class Safety(Protocol):
"""
...
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
"""Create moderation.

View file

@ -545,7 +545,6 @@ class VectorIO(Protocol):
...
# OpenAI Vector Stores API endpoints
@webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
async def openai_create_vector_store(
self,
@ -558,7 +557,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_vector_stores(
self,
@ -577,9 +575,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_vector_store(
self,
@ -592,9 +587,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(
route="/vector_stores/{vector_store_id}",
method="POST",
@ -617,9 +609,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(
route="/vector_stores/{vector_store_id}",
method="DELETE",
@ -636,12 +625,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/search",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/vector_stores/{vector_store_id}/search",
method="POST",
@ -674,12 +657,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/vector_stores/{vector_store_id}/files",
method="POST",
@ -702,12 +679,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/vector_stores/{vector_store_id}/files",
method="GET",
@ -734,12 +705,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}",
method="GET",
@ -758,12 +723,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}/content",
method="GET",
@ -782,12 +741,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}",
method="POST",
@ -808,12 +761,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
method="DELETE",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}",
method="DELETE",
@ -837,12 +784,6 @@ class VectorIO(Protocol):
method="POST",
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
async def openai_create_vector_store_file_batch(
self,
vector_store_id: str,
@ -861,12 +802,6 @@ class VectorIO(Protocol):
method="GET",
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
async def openai_retrieve_vector_store_file_batch(
self,
batch_id: str,
@ -880,12 +815,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
method="GET",
@ -914,12 +843,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
method="POST",

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import os
import unittest
from unittest.mock import MagicMock, patch
import pytest
@ -13,6 +12,8 @@ import pytest
from llama_stack.apis.benchmarks import Benchmark
from llama_stack.apis.common.job_types import Job, JobStatus
from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
from llama_stack.apis.inference.inference import TopPSamplingStrategy
from llama_stack.apis.resource import ResourceType
from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
@ -21,193 +22,200 @@ MOCK_DATASET_ID = "default/test-dataset"
MOCK_BENCHMARK_ID = "test-benchmark"
class TestNVIDIAEvalImpl(unittest.TestCase):
def setUp(self):
os.environ["NVIDIA_EVALUATOR_URL"] = "http://nemo.test"
@pytest.fixture
def nvidia_eval_setup():
"""Set up the NVIDIA eval implementation with mocked dependencies."""
os.environ["NVIDIA_EVALUATOR_URL"] = "http://nemo.test"
# Create mock APIs
self.datasetio_api = MagicMock()
self.datasets_api = MagicMock()
self.scoring_api = MagicMock()
self.inference_api = MagicMock()
self.agents_api = MagicMock()
# Create mock APIs
datasetio_api = MagicMock()
datasets_api = MagicMock()
scoring_api = MagicMock()
inference_api = MagicMock()
agents_api = MagicMock()
self.config = NVIDIAEvalConfig(
evaluator_url=os.environ["NVIDIA_EVALUATOR_URL"],
)
config = NVIDIAEvalConfig(
evaluator_url=os.environ["NVIDIA_EVALUATOR_URL"],
)
self.eval_impl = NVIDIAEvalImpl(
config=self.config,
datasetio_api=self.datasetio_api,
datasets_api=self.datasets_api,
scoring_api=self.scoring_api,
inference_api=self.inference_api,
agents_api=self.agents_api,
)
eval_impl = NVIDIAEvalImpl(
config=config,
datasetio_api=datasetio_api,
datasets_api=datasets_api,
scoring_api=scoring_api,
inference_api=inference_api,
agents_api=agents_api,
)
# Mock the HTTP request methods
self.evaluator_get_patcher = patch(
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_get"
)
self.evaluator_post_patcher = patch(
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_post"
)
self.evaluator_delete_patcher = patch(
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_delete"
)
self.mock_evaluator_get = self.evaluator_get_patcher.start()
self.mock_evaluator_post = self.evaluator_post_patcher.start()
self.mock_evaluator_delete = self.evaluator_delete_patcher.start()
def tearDown(self):
"""Clean up after each test."""
self.evaluator_get_patcher.stop()
self.evaluator_post_patcher.stop()
self.evaluator_delete_patcher.stop()
def _assert_request_body(self, expected_json):
"""Helper method to verify request body in Evaluator POST request is correct"""
call_args = self.mock_evaluator_post.call_args
actual_json = call_args[0][1]
# Check that all expected keys contain the expected values in the actual JSON
for key, value in expected_json.items():
assert key in actual_json, f"Key '{key}' missing in actual JSON"
if isinstance(value, dict):
for nested_key, nested_value in value.items():
assert nested_key in actual_json[key], f"Nested key '{nested_key}' missing in actual JSON['{key}']"
assert actual_json[key][nested_key] == nested_value, f"Value mismatch for '{key}.{nested_key}'"
else:
assert actual_json[key] == value, f"Value mismatch for '{key}'"
@pytest.fixture(autouse=True)
def inject_fixtures(self, run_async):
self.run_async = run_async
def test_register_benchmark(self):
eval_config = {
"type": "custom",
"params": {"parallelism": 8},
"tasks": {
"qa": {
"type": "completion",
"params": {"template": {"prompt": "{{prompt}}", "max_tokens": 200}},
"dataset": {"files_url": f"hf://datasets/{MOCK_DATASET_ID}/testing/testing.jsonl"},
"metrics": {"bleu": {"type": "bleu", "params": {"references": ["{{ideal_response}}"]}}},
}
},
# Mock the HTTP request methods
with (
patch("llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_get") as mock_evaluator_get,
patch("llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_post") as mock_evaluator_post,
):
yield {
"eval_impl": eval_impl,
"mock_evaluator_get": mock_evaluator_get,
"mock_evaluator_post": mock_evaluator_post,
"datasetio_api": datasetio_api,
"datasets_api": datasets_api,
"scoring_api": scoring_api,
"inference_api": inference_api,
"agents_api": agents_api,
}
benchmark = Benchmark(
provider_id="nvidia",
type="benchmark",
identifier=MOCK_BENCHMARK_ID,
dataset_id=MOCK_DATASET_ID,
scoring_functions=["basic::equality"],
metadata=eval_config,
)
# Mock Evaluator API response
mock_evaluator_response = {"id": MOCK_BENCHMARK_ID, "status": "created"}
self.mock_evaluator_post.return_value = mock_evaluator_response
def _assert_request_body(mock_evaluator_post, expected_json):
"""Helper method to verify request body in Evaluator POST request is correct"""
call_args = mock_evaluator_post.call_args
actual_json = call_args[0][1]
# Register the benchmark
self.run_async(self.eval_impl.register_benchmark(benchmark))
# Check that all expected keys contain the expected values in the actual JSON
for key, value in expected_json.items():
assert key in actual_json, f"Key '{key}' missing in actual JSON"
# Verify the Evaluator API was called correctly
self.mock_evaluator_post.assert_called_once()
self._assert_request_body({"namespace": benchmark.provider_id, "name": benchmark.identifier, **eval_config})
if isinstance(value, dict):
for nested_key, nested_value in value.items():
assert nested_key in actual_json[key], f"Nested key '{nested_key}' missing in actual JSON['{key}']"
assert actual_json[key][nested_key] == nested_value, f"Value mismatch for '{key}.{nested_key}'"
else:
assert actual_json[key] == value, f"Value mismatch for '{key}'"
def test_unregister_benchmark(self):
# Unregister the benchmark
self.run_async(self.eval_impl.unregister_benchmark(benchmark_id=MOCK_BENCHMARK_ID))
# Verify the Evaluator API was called correctly
self.mock_evaluator_delete.assert_called_once_with(f"/v1/evaluation/configs/nvidia/{MOCK_BENCHMARK_ID}")
async def test_register_benchmark(nvidia_eval_setup):
eval_impl = nvidia_eval_setup["eval_impl"]
mock_evaluator_post = nvidia_eval_setup["mock_evaluator_post"]
def test_run_eval(self):
benchmark_config = BenchmarkConfig(
eval_candidate=ModelCandidate(
type="model",
model=CoreModelId.llama3_1_8b_instruct.value,
sampling_params=SamplingParams(max_tokens=100, temperature=0.7),
)
)
# Mock Evaluator API response
mock_evaluator_response = {"id": "job-123", "status": "created"}
self.mock_evaluator_post.return_value = mock_evaluator_response
# Run the Evaluation job
result = self.run_async(
self.eval_impl.run_eval(benchmark_id=MOCK_BENCHMARK_ID, benchmark_config=benchmark_config)
)
# Verify the Evaluator API was called correctly
self.mock_evaluator_post.assert_called_once()
self._assert_request_body(
{
"config": f"nvidia/{MOCK_BENCHMARK_ID}",
"target": {"type": "model", "model": "Llama3.1-8B-Instruct"},
eval_config = {
"type": "custom",
"params": {"parallelism": 8},
"tasks": {
"qa": {
"type": "completion",
"params": {"template": {"prompt": "{{prompt}}", "max_tokens": 200}},
"dataset": {"files_url": f"hf://datasets/{MOCK_DATASET_ID}/testing/testing.jsonl"},
"metrics": {"bleu": {"type": "bleu", "params": {"references": ["{{ideal_response}}"]}}},
}
},
}
benchmark = Benchmark(
provider_id="nvidia",
type=ResourceType.benchmark,
identifier=MOCK_BENCHMARK_ID,
dataset_id=MOCK_DATASET_ID,
scoring_functions=["basic::equality"],
metadata=eval_config,
)
# Mock Evaluator API response
mock_evaluator_response = {"id": MOCK_BENCHMARK_ID, "status": "created"}
mock_evaluator_post.return_value = mock_evaluator_response
# Register the benchmark
await eval_impl.register_benchmark(benchmark)
# Verify the Evaluator API was called correctly
mock_evaluator_post.assert_called_once()
_assert_request_body(
mock_evaluator_post, {"namespace": benchmark.provider_id, "name": benchmark.identifier, **eval_config}
)
async def test_run_eval(nvidia_eval_setup):
eval_impl = nvidia_eval_setup["eval_impl"]
mock_evaluator_post = nvidia_eval_setup["mock_evaluator_post"]
benchmark_config = BenchmarkConfig(
eval_candidate=ModelCandidate(
type="model",
model=CoreModelId.llama3_1_8b_instruct.value,
sampling_params=SamplingParams(max_tokens=100, strategy=TopPSamplingStrategy(temperature=0.7)),
)
)
# Verify the result
assert isinstance(result, Job)
assert result.job_id == "job-123"
assert result.status == JobStatus.in_progress
# Mock Evaluator API response
mock_evaluator_response = {"id": "job-123", "status": "created"}
mock_evaluator_post.return_value = mock_evaluator_response
def test_job_status(self):
# Mock Evaluator API response
mock_evaluator_response = {"id": "job-123", "status": "completed"}
self.mock_evaluator_get.return_value = mock_evaluator_response
# Run the Evaluation job
result = await eval_impl.run_eval(benchmark_id=MOCK_BENCHMARK_ID, benchmark_config=benchmark_config)
# Get the Evaluation job
result = self.run_async(self.eval_impl.job_status(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123"))
# Verify the Evaluator API was called correctly
mock_evaluator_post.assert_called_once()
_assert_request_body(
mock_evaluator_post,
{
"config": f"nvidia/{MOCK_BENCHMARK_ID}",
"target": {"type": "model", "model": "Llama3.1-8B-Instruct"},
},
)
# Verify the result
assert isinstance(result, Job)
assert result.job_id == "job-123"
assert result.status == JobStatus.completed
# Verify the result
assert isinstance(result, Job)
assert result.job_id == "job-123"
assert result.status == JobStatus.in_progress
# Verify the API was called correctly
self.mock_evaluator_get.assert_called_once_with(f"/v1/evaluation/jobs/{result.job_id}")
def test_job_cancel(self):
# Mock Evaluator API response
mock_evaluator_response = {"id": "job-123", "status": "cancelled"}
self.mock_evaluator_post.return_value = mock_evaluator_response
async def test_job_status(nvidia_eval_setup):
eval_impl = nvidia_eval_setup["eval_impl"]
mock_evaluator_get = nvidia_eval_setup["mock_evaluator_get"]
# Cancel the Evaluation job
self.run_async(self.eval_impl.job_cancel(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123"))
# Mock Evaluator API response
mock_evaluator_response = {"id": "job-123", "status": "completed"}
mock_evaluator_get.return_value = mock_evaluator_response
# Verify the API was called correctly
self.mock_evaluator_post.assert_called_once_with("/v1/evaluation/jobs/job-123/cancel", {})
# Get the Evaluation job
result = await eval_impl.job_status(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123")
def test_job_result(self):
# Mock Evaluator API responses
mock_job_status_response = {"id": "job-123", "status": "completed"}
mock_job_results_response = {
"id": "job-123",
"status": "completed",
"results": {MOCK_BENCHMARK_ID: {"score": 0.85, "details": {"accuracy": 0.85, "f1": 0.84}}},
}
self.mock_evaluator_get.side_effect = [
mock_job_status_response, # First call to retrieve job
mock_job_results_response, # Second call to retrieve job results
]
# Verify the result
assert isinstance(result, Job)
assert result.job_id == "job-123"
assert result.status == JobStatus.completed
# Get the Evaluation job results
result = self.run_async(self.eval_impl.job_result(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123"))
# Verify the API was called correctly
mock_evaluator_get.assert_called_once_with(f"/v1/evaluation/jobs/{result.job_id}")
# Verify the result
assert isinstance(result, EvaluateResponse)
assert MOCK_BENCHMARK_ID in result.scores
assert result.scores[MOCK_BENCHMARK_ID].aggregated_results["results"][MOCK_BENCHMARK_ID]["score"] == 0.85
# Verify the API was called correctly
assert self.mock_evaluator_get.call_count == 2
self.mock_evaluator_get.assert_any_call("/v1/evaluation/jobs/job-123")
self.mock_evaluator_get.assert_any_call("/v1/evaluation/jobs/job-123/results")
async def test_job_cancel(nvidia_eval_setup):
eval_impl = nvidia_eval_setup["eval_impl"]
mock_evaluator_post = nvidia_eval_setup["mock_evaluator_post"]
# Mock Evaluator API response
mock_evaluator_response = {"id": "job-123", "status": "cancelled"}
mock_evaluator_post.return_value = mock_evaluator_response
# Cancel the Evaluation job
await eval_impl.job_cancel(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123")
# Verify the API was called correctly
mock_evaluator_post.assert_called_once_with("/v1/evaluation/jobs/job-123/cancel", {})
async def test_job_result(nvidia_eval_setup):
eval_impl = nvidia_eval_setup["eval_impl"]
mock_evaluator_get = nvidia_eval_setup["mock_evaluator_get"]
# Mock Evaluator API responses
mock_job_status_response = {"id": "job-123", "status": "completed"}
mock_job_results_response = {
"id": "job-123",
"status": "completed",
"results": {MOCK_BENCHMARK_ID: {"score": 0.85, "details": {"accuracy": 0.85, "f1": 0.84}}},
}
mock_evaluator_get.side_effect = [
mock_job_status_response, # First call to retrieve job
mock_job_results_response, # Second call to retrieve job results
]
# Get the Evaluation job results
result = await eval_impl.job_result(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123")
# Verify the result
assert isinstance(result, EvaluateResponse)
assert MOCK_BENCHMARK_ID in result.scores
assert result.scores[MOCK_BENCHMARK_ID].aggregated_results["results"][MOCK_BENCHMARK_ID]["score"] == 0.85
# Verify the API was called correctly
assert mock_evaluator_get.call_count == 2
mock_evaluator_get.assert_any_call("/v1/evaluation/jobs/job-123")
mock_evaluator_get.assert_any_call("/v1/evaluation/jobs/job-123/results")