Merge branch 'main' into feat/gunicorn-production-server

This commit is contained in:
Roy Belio 2025-11-04 15:57:41 +02:00 committed by GitHub
commit 9ff881a28a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 439 additions and 11392 deletions

View file

@ -977,11 +977,11 @@ paths:
get: get:
responses: responses:
'200': '200':
description: A ListModelsResponse. description: A OpenAIListModelsResponse.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/ListModelsResponse' $ref: '#/components/schemas/OpenAIListModelsResponse'
'400': '400':
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
'429': '429':
@ -994,8 +994,8 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Models - Models
summary: List all models. summary: List models using the OpenAI API.
description: List all models. description: List models using the OpenAI API.
parameters: [] parameters: []
deprecated: false deprecated: false
post: post:
@ -1129,31 +1129,6 @@ paths:
$ref: '#/components/schemas/RunModerationRequest' $ref: '#/components/schemas/RunModerationRequest'
required: true required: true
deprecated: false deprecated: false
/v1/openai/v1/models:
get:
responses:
'200':
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
/v1/prompts: /v1/prompts:
get: get:
responses: responses:
@ -6823,6 +6798,88 @@ components:
title: ListRoutesResponse title: ListRoutesResponse
description: >- description: >-
Response containing a list of all available API routes. Response containing a list of all available API routes.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model: Model:
type: object type: object
properties: properties:
@ -6880,57 +6937,6 @@ components:
title: Model title: Model
description: >- description: >-
A model resource representing an AI model registered in Llama Stack. A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
ListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Model'
additionalProperties: false
required:
- data
title: ListModelsResponse
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
RunModerationRequest: RunModerationRequest:
type: object type: object
properties: properties:
@ -7020,48 +7026,6 @@ components:
- metadata - metadata
title: ModerationObjectResults title: ModerationObjectResults
description: A moderation object. description: A moderation object.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
Prompt: Prompt:
type: object type: object
properties: properties:

File diff suppressed because it is too large Load diff

View file

@ -974,11 +974,11 @@ paths:
get: get:
responses: responses:
'200': '200':
description: A ListModelsResponse. description: A OpenAIListModelsResponse.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/ListModelsResponse' $ref: '#/components/schemas/OpenAIListModelsResponse'
'400': '400':
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
'429': '429':
@ -991,8 +991,8 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Models - Models
summary: List all models. summary: List models using the OpenAI API.
description: List all models. description: List models using the OpenAI API.
parameters: [] parameters: []
deprecated: false deprecated: false
post: post:
@ -1126,31 +1126,6 @@ paths:
$ref: '#/components/schemas/RunModerationRequest' $ref: '#/components/schemas/RunModerationRequest'
required: true required: true
deprecated: false deprecated: false
/v1/openai/v1/models:
get:
responses:
'200':
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
/v1/prompts: /v1/prompts:
get: get:
responses: responses:
@ -5610,6 +5585,88 @@ components:
title: ListRoutesResponse title: ListRoutesResponse
description: >- description: >-
Response containing a list of all available API routes. Response containing a list of all available API routes.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model: Model:
type: object type: object
properties: properties:
@ -5667,57 +5724,6 @@ components:
title: Model title: Model
description: >- description: >-
A model resource representing an AI model registered in Llama Stack. A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
ListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Model'
additionalProperties: false
required:
- data
title: ListModelsResponse
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
RunModerationRequest: RunModerationRequest:
type: object type: object
properties: properties:
@ -5807,48 +5813,6 @@ components:
- metadata - metadata
title: ModerationObjectResults title: ModerationObjectResults
description: A moderation object. description: A moderation object.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
Prompt: Prompt:
type: object type: object
properties: properties:

View file

@ -977,11 +977,11 @@ paths:
get: get:
responses: responses:
'200': '200':
description: A ListModelsResponse. description: A OpenAIListModelsResponse.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/ListModelsResponse' $ref: '#/components/schemas/OpenAIListModelsResponse'
'400': '400':
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
'429': '429':
@ -994,8 +994,8 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- Models - Models
summary: List all models. summary: List models using the OpenAI API.
description: List all models. description: List models using the OpenAI API.
parameters: [] parameters: []
deprecated: false deprecated: false
post: post:
@ -1129,31 +1129,6 @@ paths:
$ref: '#/components/schemas/RunModerationRequest' $ref: '#/components/schemas/RunModerationRequest'
required: true required: true
deprecated: false deprecated: false
/v1/openai/v1/models:
get:
responses:
'200':
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
/v1/prompts: /v1/prompts:
get: get:
responses: responses:
@ -6823,6 +6798,88 @@ components:
title: ListRoutesResponse title: ListRoutesResponse
description: >- description: >-
Response containing a list of all available API routes. Response containing a list of all available API routes.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model: Model:
type: object type: object
properties: properties:
@ -6880,57 +6937,6 @@ components:
title: Model title: Model
description: >- description: >-
A model resource representing an AI model registered in Llama Stack. A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
ListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Model'
additionalProperties: false
required:
- data
title: ListModelsResponse
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
RunModerationRequest: RunModerationRequest:
type: object type: object
properties: properties:
@ -7020,48 +7026,6 @@ components:
- metadata - metadata
title: ModerationObjectResults title: ModerationObjectResults
description: A moderation object. description: A moderation object.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
Prompt: Prompt:
type: object type: object
properties: properties:

View file

@ -491,13 +491,6 @@ class Agents(Protocol):
APIs for creating and interacting with agentic systems.""" APIs for creating and interacting with agentic systems."""
@webmethod(
route="/agents",
method="POST",
descriptive_name="create_agent",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents", route="/agents",
method="POST", method="POST",
@ -515,13 +508,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn",
method="POST",
descriptive_name="create_agent_turn",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn", route="/agents/{agent_id}/session/{session_id}/turn",
method="POST", method="POST",
@ -552,13 +538,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
method="POST",
descriptive_name="resume_agent_turn",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume", route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
method="POST", method="POST",
@ -586,12 +565,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}", route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
method="GET", method="GET",
@ -612,12 +585,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}", route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
method="GET", method="GET",
@ -640,13 +607,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session",
method="POST",
descriptive_name="create_agent_session",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session", route="/agents/{agent_id}/session",
method="POST", method="POST",
@ -666,12 +626,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}", route="/agents/{agent_id}/session/{session_id}",
method="GET", method="GET",
@ -692,12 +646,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}",
method="DELETE",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}", route="/agents/{agent_id}/session/{session_id}",
method="DELETE", method="DELETE",
@ -715,12 +663,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}",
method="DELETE",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def delete_agent( async def delete_agent(
self, self,
@ -732,7 +674,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/agents", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse: async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
"""List all agents. """List all agents.
@ -743,12 +684,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_agent(self, agent_id: str) -> Agent: async def get_agent(self, agent_id: str) -> Agent:
"""Describe an agent by its ID. """Describe an agent by its ID.
@ -758,12 +693,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/sessions",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_agent_sessions( async def list_agent_sessions(
self, self,
@ -787,12 +716,6 @@ class Agents(Protocol):
# #
# Both of these APIs are inherently stateful. # Both of these APIs are inherently stateful.
@webmethod(
route="/openai/v1/responses/{response_id}",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_openai_response( async def get_openai_response(
self, self,
@ -805,7 +728,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
async def create_openai_response( async def create_openai_response(
self, self,
@ -842,7 +764,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_responses( async def list_openai_responses(
self, self,
@ -861,9 +782,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_response_input_items( async def list_openai_response_input_items(
self, self,
@ -886,7 +804,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1) @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject: async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
"""Delete a response. """Delete a response.

View file

@ -43,7 +43,6 @@ class Batches(Protocol):
Note: This API is currently under active development and may undergo changes. Note: This API is currently under active development and may undergo changes.
""" """
@webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
async def create_batch( async def create_batch(
self, self,
@ -64,7 +63,6 @@ class Batches(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
async def retrieve_batch(self, batch_id: str) -> BatchObject: async def retrieve_batch(self, batch_id: str) -> BatchObject:
"""Retrieve information about a specific batch. """Retrieve information about a specific batch.
@ -74,7 +72,6 @@ class Batches(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
async def cancel_batch(self, batch_id: str) -> BatchObject: async def cancel_batch(self, batch_id: str) -> BatchObject:
"""Cancel a batch that is in progress. """Cancel a batch that is in progress.
@ -84,7 +81,6 @@ class Batches(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
async def list_batches( async def list_batches(
self, self,

View file

@ -8,7 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -54,7 +54,6 @@ class ListBenchmarksResponse(BaseModel):
@runtime_checkable @runtime_checkable
class Benchmarks(Protocol): class Benchmarks(Protocol):
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_benchmarks(self) -> ListBenchmarksResponse: async def list_benchmarks(self) -> ListBenchmarksResponse:
"""List all benchmarks. """List all benchmarks.
@ -63,7 +62,6 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_benchmark( async def get_benchmark(
self, self,
@ -76,7 +74,6 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def register_benchmark( async def register_benchmark(
self, self,
@ -98,7 +95,6 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def unregister_benchmark(self, benchmark_id: str) -> None: async def unregister_benchmark(self, benchmark_id: str) -> None:
"""Unregister a benchmark. """Unregister a benchmark.

View file

@ -8,7 +8,7 @@ from typing import Any, Protocol, runtime_checkable
from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.datasets import Dataset from llama_stack.apis.datasets import Dataset
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
from llama_stack.schema_utils import webmethod from llama_stack.schema_utils import webmethod
@ -21,7 +21,6 @@ class DatasetIO(Protocol):
# keeping for aligning with inference/safety, but this is not used # keeping for aligning with inference/safety, but this is not used
dataset_store: DatasetStore dataset_store: DatasetStore
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
async def iterrows( async def iterrows(
self, self,
@ -46,9 +45,6 @@ class DatasetIO(Protocol):
""" """
... ...
@webmethod(
route="/datasetio/append-rows/{dataset_id:path}", method="POST", deprecated=True, level=LLAMA_STACK_API_V1
)
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA)
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
"""Append rows to a dataset. """Append rows to a dataset.

View file

@ -10,7 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -146,7 +146,6 @@ class ListDatasetsResponse(BaseModel):
class Datasets(Protocol): class Datasets(Protocol):
@webmethod(route="/datasets", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
async def register_dataset( async def register_dataset(
self, self,
@ -216,7 +215,6 @@ class Datasets(Protocol):
""" """
... ...
@webmethod(route="/datasets/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
async def get_dataset( async def get_dataset(
self, self,
@ -229,7 +227,6 @@ class Datasets(Protocol):
""" """
... ...
@webmethod(route="/datasets", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA)
async def list_datasets(self) -> ListDatasetsResponse: async def list_datasets(self) -> ListDatasetsResponse:
"""List all datasets. """List all datasets.
@ -238,7 +235,6 @@ class Datasets(Protocol):
""" """
... ...
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
async def unregister_dataset( async def unregister_dataset(
self, self,

View file

@ -13,7 +13,7 @@ from llama_stack.apis.common.job_types import Job
from llama_stack.apis.inference import SamplingParams, SystemMessage from llama_stack.apis.inference import SamplingParams, SystemMessage
from llama_stack.apis.scoring import ScoringResult from llama_stack.apis.scoring import ScoringResult
from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -86,7 +86,6 @@ class Eval(Protocol):
Llama Stack Evaluation API for running evaluations on model and agent candidates.""" Llama Stack Evaluation API for running evaluations on model and agent candidates."""
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def run_eval( async def run_eval(
self, self,
@ -101,9 +100,6 @@ class Eval(Protocol):
""" """
... ...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def evaluate_rows( async def evaluate_rows(
self, self,
@ -122,9 +118,6 @@ class Eval(Protocol):
""" """
... ...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def job_status(self, benchmark_id: str, job_id: str) -> Job: async def job_status(self, benchmark_id: str, job_id: str) -> Job:
"""Get the status of a job. """Get the status of a job.
@ -135,12 +128,6 @@ class Eval(Protocol):
""" """
... ...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
method="DELETE",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def job_cancel(self, benchmark_id: str, job_id: str) -> None: async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
"""Cancel a job. """Cancel a job.
@ -150,12 +137,6 @@ class Eval(Protocol):
""" """
... ...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
) )

View file

@ -110,7 +110,6 @@ class Files(Protocol):
""" """
# OpenAI Files API Endpoints # OpenAI Files API Endpoints
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
async def openai_upload_file( async def openai_upload_file(
self, self,
@ -134,7 +133,6 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_files( async def openai_list_files(
self, self,
@ -155,7 +153,6 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file( async def openai_retrieve_file(
self, self,
@ -170,7 +167,6 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1) @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_file( async def openai_delete_file(
self, self,
@ -183,7 +179,6 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file_content( async def openai_retrieve_file_content(
self, self,

View file

@ -1189,7 +1189,6 @@ class InferenceProvider(Protocol):
raise NotImplementedError("Reranking is not implemented") raise NotImplementedError("Reranking is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_completion( async def openai_completion(
self, self,
@ -1202,7 +1201,6 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_chat_completion( async def openai_chat_completion(
self, self,
@ -1215,7 +1213,6 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def openai_embeddings( async def openai_embeddings(
self, self,
@ -1240,7 +1237,6 @@ class Inference(InferenceProvider):
- Rerank models: these models reorder the documents based on their relevance to a query. - Rerank models: these models reorder the documents based on their relevance to a query.
""" """
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
async def list_chat_completions( async def list_chat_completions(
self, self,
@ -1259,9 +1255,6 @@ class Inference(InferenceProvider):
""" """
raise NotImplementedError("List chat completions is not implemented") raise NotImplementedError("List chat completions is not implemented")
@webmethod(
route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages: async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
"""Get chat completion. """Get chat completion.

View file

@ -107,7 +107,6 @@ class OpenAIListModelsResponse(BaseModel):
@runtime_checkable @runtime_checkable
@trace_protocol @trace_protocol
class Models(Protocol): class Models(Protocol):
@webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
async def list_models(self) -> ListModelsResponse: async def list_models(self) -> ListModelsResponse:
"""List all models. """List all models.
@ -115,7 +114,7 @@ class Models(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_models(self) -> OpenAIListModelsResponse: async def openai_list_models(self) -> OpenAIListModelsResponse:
"""List models using the OpenAI API. """List models using the OpenAI API.

View file

@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.common.content_types import URL from llama_stack.apis.common.content_types import URL
from llama_stack.apis.common.job_types import JobStatus from llama_stack.apis.common.job_types import JobStatus
from llama_stack.apis.common.training_types import Checkpoint from llama_stack.apis.common.training_types import Checkpoint
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -284,7 +284,6 @@ class PostTrainingJobArtifactsResponse(BaseModel):
class PostTraining(Protocol): class PostTraining(Protocol):
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def supervised_fine_tune( async def supervised_fine_tune(
self, self,
@ -312,7 +311,6 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def preference_optimize( async def preference_optimize(
self, self,
@ -335,7 +333,6 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_jobs(self) -> ListPostTrainingJobsResponse: async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
"""Get all training jobs. """Get all training jobs.
@ -344,7 +341,6 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
"""Get the status of a training job. """Get the status of a training job.
@ -354,7 +350,6 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def cancel_training_job(self, job_uuid: str) -> None: async def cancel_training_job(self, job_uuid: str) -> None:
"""Cancel a training job. """Cancel a training job.
@ -363,7 +358,6 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
"""Get the artifacts of a training job. """Get the artifacts of a training job.

View file

@ -121,7 +121,6 @@ class Safety(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject: async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
"""Create moderation. """Create moderation.

View file

@ -545,7 +545,6 @@ class VectorIO(Protocol):
... ...
# OpenAI Vector Stores API endpoints # OpenAI Vector Stores API endpoints
@webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
async def openai_create_vector_store( async def openai_create_vector_store(
self, self,
@ -558,7 +557,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_vector_stores( async def openai_list_vector_stores(
self, self,
@ -577,9 +575,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_vector_store( async def openai_retrieve_vector_store(
self, self,
@ -592,9 +587,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}", route="/vector_stores/{vector_store_id}",
method="POST", method="POST",
@ -617,9 +609,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}", route="/vector_stores/{vector_store_id}",
method="DELETE", method="DELETE",
@ -636,12 +625,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/search",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/search", route="/vector_stores/{vector_store_id}/search",
method="POST", method="POST",
@ -674,12 +657,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files", route="/vector_stores/{vector_store_id}/files",
method="POST", method="POST",
@ -702,12 +679,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files", route="/vector_stores/{vector_store_id}/files",
method="GET", method="GET",
@ -734,12 +705,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}", route="/vector_stores/{vector_store_id}/files/{file_id}",
method="GET", method="GET",
@ -758,12 +723,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}/content", route="/vector_stores/{vector_store_id}/files/{file_id}/content",
method="GET", method="GET",
@ -782,12 +741,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}", route="/vector_stores/{vector_store_id}/files/{file_id}",
method="POST", method="POST",
@ -808,12 +761,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
method="DELETE",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}", route="/vector_stores/{vector_store_id}/files/{file_id}",
method="DELETE", method="DELETE",
@ -837,12 +784,6 @@ class VectorIO(Protocol):
method="POST", method="POST",
level=LLAMA_STACK_API_V1, level=LLAMA_STACK_API_V1,
) )
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
async def openai_create_vector_store_file_batch( async def openai_create_vector_store_file_batch(
self, self,
vector_store_id: str, vector_store_id: str,
@ -861,12 +802,6 @@ class VectorIO(Protocol):
method="GET", method="GET",
level=LLAMA_STACK_API_V1, level=LLAMA_STACK_API_V1,
) )
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
async def openai_retrieve_vector_store_file_batch( async def openai_retrieve_vector_store_file_batch(
self, self,
batch_id: str, batch_id: str,
@ -880,12 +815,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
method="GET", method="GET",
@ -914,12 +843,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
method="POST", method="POST",

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import os import os
import unittest
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import pytest import pytest
@ -13,6 +12,8 @@ import pytest
from llama_stack.apis.benchmarks import Benchmark from llama_stack.apis.benchmarks import Benchmark
from llama_stack.apis.common.job_types import Job, JobStatus from llama_stack.apis.common.job_types import Job, JobStatus
from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
from llama_stack.apis.inference.inference import TopPSamplingStrategy
from llama_stack.apis.resource import ResourceType
from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
@ -21,54 +22,51 @@ MOCK_DATASET_ID = "default/test-dataset"
MOCK_BENCHMARK_ID = "test-benchmark" MOCK_BENCHMARK_ID = "test-benchmark"
class TestNVIDIAEvalImpl(unittest.TestCase): @pytest.fixture
def setUp(self): def nvidia_eval_setup():
"""Set up the NVIDIA eval implementation with mocked dependencies."""
os.environ["NVIDIA_EVALUATOR_URL"] = "http://nemo.test" os.environ["NVIDIA_EVALUATOR_URL"] = "http://nemo.test"
# Create mock APIs # Create mock APIs
self.datasetio_api = MagicMock() datasetio_api = MagicMock()
self.datasets_api = MagicMock() datasets_api = MagicMock()
self.scoring_api = MagicMock() scoring_api = MagicMock()
self.inference_api = MagicMock() inference_api = MagicMock()
self.agents_api = MagicMock() agents_api = MagicMock()
self.config = NVIDIAEvalConfig( config = NVIDIAEvalConfig(
evaluator_url=os.environ["NVIDIA_EVALUATOR_URL"], evaluator_url=os.environ["NVIDIA_EVALUATOR_URL"],
) )
self.eval_impl = NVIDIAEvalImpl( eval_impl = NVIDIAEvalImpl(
config=self.config, config=config,
datasetio_api=self.datasetio_api, datasetio_api=datasetio_api,
datasets_api=self.datasets_api, datasets_api=datasets_api,
scoring_api=self.scoring_api, scoring_api=scoring_api,
inference_api=self.inference_api, inference_api=inference_api,
agents_api=self.agents_api, agents_api=agents_api,
) )
# Mock the HTTP request methods # Mock the HTTP request methods
self.evaluator_get_patcher = patch( with (
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_get" patch("llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_get") as mock_evaluator_get,
) patch("llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_post") as mock_evaluator_post,
self.evaluator_post_patcher = patch( ):
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_post" yield {
) "eval_impl": eval_impl,
self.evaluator_delete_patcher = patch( "mock_evaluator_get": mock_evaluator_get,
"llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_delete" "mock_evaluator_post": mock_evaluator_post,
) "datasetio_api": datasetio_api,
"datasets_api": datasets_api,
"scoring_api": scoring_api,
"inference_api": inference_api,
"agents_api": agents_api,
}
self.mock_evaluator_get = self.evaluator_get_patcher.start()
self.mock_evaluator_post = self.evaluator_post_patcher.start()
self.mock_evaluator_delete = self.evaluator_delete_patcher.start()
def tearDown(self): def _assert_request_body(mock_evaluator_post, expected_json):
"""Clean up after each test."""
self.evaluator_get_patcher.stop()
self.evaluator_post_patcher.stop()
self.evaluator_delete_patcher.stop()
def _assert_request_body(self, expected_json):
"""Helper method to verify request body in Evaluator POST request is correct""" """Helper method to verify request body in Evaluator POST request is correct"""
call_args = self.mock_evaluator_post.call_args call_args = mock_evaluator_post.call_args
actual_json = call_args[0][1] actual_json = call_args[0][1]
# Check that all expected keys contain the expected values in the actual JSON # Check that all expected keys contain the expected values in the actual JSON
@ -82,11 +80,11 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
else: else:
assert actual_json[key] == value, f"Value mismatch for '{key}'" assert actual_json[key] == value, f"Value mismatch for '{key}'"
@pytest.fixture(autouse=True)
def inject_fixtures(self, run_async):
self.run_async = run_async
def test_register_benchmark(self): async def test_register_benchmark(nvidia_eval_setup):
eval_impl = nvidia_eval_setup["eval_impl"]
mock_evaluator_post = nvidia_eval_setup["mock_evaluator_post"]
eval_config = { eval_config = {
"type": "custom", "type": "custom",
"params": {"parallelism": 8}, "params": {"parallelism": 8},
@ -102,7 +100,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
benchmark = Benchmark( benchmark = Benchmark(
provider_id="nvidia", provider_id="nvidia",
type="benchmark", type=ResourceType.benchmark,
identifier=MOCK_BENCHMARK_ID, identifier=MOCK_BENCHMARK_ID,
dataset_id=MOCK_DATASET_ID, dataset_id=MOCK_DATASET_ID,
scoring_functions=["basic::equality"], scoring_functions=["basic::equality"],
@ -111,47 +109,45 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
# Mock Evaluator API response # Mock Evaluator API response
mock_evaluator_response = {"id": MOCK_BENCHMARK_ID, "status": "created"} mock_evaluator_response = {"id": MOCK_BENCHMARK_ID, "status": "created"}
self.mock_evaluator_post.return_value = mock_evaluator_response mock_evaluator_post.return_value = mock_evaluator_response
# Register the benchmark # Register the benchmark
self.run_async(self.eval_impl.register_benchmark(benchmark)) await eval_impl.register_benchmark(benchmark)
# Verify the Evaluator API was called correctly # Verify the Evaluator API was called correctly
self.mock_evaluator_post.assert_called_once() mock_evaluator_post.assert_called_once()
self._assert_request_body({"namespace": benchmark.provider_id, "name": benchmark.identifier, **eval_config}) _assert_request_body(
mock_evaluator_post, {"namespace": benchmark.provider_id, "name": benchmark.identifier, **eval_config}
)
def test_unregister_benchmark(self):
# Unregister the benchmark
self.run_async(self.eval_impl.unregister_benchmark(benchmark_id=MOCK_BENCHMARK_ID))
# Verify the Evaluator API was called correctly async def test_run_eval(nvidia_eval_setup):
self.mock_evaluator_delete.assert_called_once_with(f"/v1/evaluation/configs/nvidia/{MOCK_BENCHMARK_ID}") eval_impl = nvidia_eval_setup["eval_impl"]
mock_evaluator_post = nvidia_eval_setup["mock_evaluator_post"]
def test_run_eval(self):
benchmark_config = BenchmarkConfig( benchmark_config = BenchmarkConfig(
eval_candidate=ModelCandidate( eval_candidate=ModelCandidate(
type="model", type="model",
model=CoreModelId.llama3_1_8b_instruct.value, model=CoreModelId.llama3_1_8b_instruct.value,
sampling_params=SamplingParams(max_tokens=100, temperature=0.7), sampling_params=SamplingParams(max_tokens=100, strategy=TopPSamplingStrategy(temperature=0.7)),
) )
) )
# Mock Evaluator API response # Mock Evaluator API response
mock_evaluator_response = {"id": "job-123", "status": "created"} mock_evaluator_response = {"id": "job-123", "status": "created"}
self.mock_evaluator_post.return_value = mock_evaluator_response mock_evaluator_post.return_value = mock_evaluator_response
# Run the Evaluation job # Run the Evaluation job
result = self.run_async( result = await eval_impl.run_eval(benchmark_id=MOCK_BENCHMARK_ID, benchmark_config=benchmark_config)
self.eval_impl.run_eval(benchmark_id=MOCK_BENCHMARK_ID, benchmark_config=benchmark_config)
)
# Verify the Evaluator API was called correctly # Verify the Evaluator API was called correctly
self.mock_evaluator_post.assert_called_once() mock_evaluator_post.assert_called_once()
self._assert_request_body( _assert_request_body(
mock_evaluator_post,
{ {
"config": f"nvidia/{MOCK_BENCHMARK_ID}", "config": f"nvidia/{MOCK_BENCHMARK_ID}",
"target": {"type": "model", "model": "Llama3.1-8B-Instruct"}, "target": {"type": "model", "model": "Llama3.1-8B-Instruct"},
} },
) )
# Verify the result # Verify the result
@ -159,13 +155,17 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
assert result.job_id == "job-123" assert result.job_id == "job-123"
assert result.status == JobStatus.in_progress assert result.status == JobStatus.in_progress
def test_job_status(self):
async def test_job_status(nvidia_eval_setup):
eval_impl = nvidia_eval_setup["eval_impl"]
mock_evaluator_get = nvidia_eval_setup["mock_evaluator_get"]
# Mock Evaluator API response # Mock Evaluator API response
mock_evaluator_response = {"id": "job-123", "status": "completed"} mock_evaluator_response = {"id": "job-123", "status": "completed"}
self.mock_evaluator_get.return_value = mock_evaluator_response mock_evaluator_get.return_value = mock_evaluator_response
# Get the Evaluation job # Get the Evaluation job
result = self.run_async(self.eval_impl.job_status(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123")) result = await eval_impl.job_status(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123")
# Verify the result # Verify the result
assert isinstance(result, Job) assert isinstance(result, Job)
@ -173,20 +173,28 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
assert result.status == JobStatus.completed assert result.status == JobStatus.completed
# Verify the API was called correctly # Verify the API was called correctly
self.mock_evaluator_get.assert_called_once_with(f"/v1/evaluation/jobs/{result.job_id}") mock_evaluator_get.assert_called_once_with(f"/v1/evaluation/jobs/{result.job_id}")
async def test_job_cancel(nvidia_eval_setup):
eval_impl = nvidia_eval_setup["eval_impl"]
mock_evaluator_post = nvidia_eval_setup["mock_evaluator_post"]
def test_job_cancel(self):
# Mock Evaluator API response # Mock Evaluator API response
mock_evaluator_response = {"id": "job-123", "status": "cancelled"} mock_evaluator_response = {"id": "job-123", "status": "cancelled"}
self.mock_evaluator_post.return_value = mock_evaluator_response mock_evaluator_post.return_value = mock_evaluator_response
# Cancel the Evaluation job # Cancel the Evaluation job
self.run_async(self.eval_impl.job_cancel(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123")) await eval_impl.job_cancel(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123")
# Verify the API was called correctly # Verify the API was called correctly
self.mock_evaluator_post.assert_called_once_with("/v1/evaluation/jobs/job-123/cancel", {}) mock_evaluator_post.assert_called_once_with("/v1/evaluation/jobs/job-123/cancel", {})
async def test_job_result(nvidia_eval_setup):
eval_impl = nvidia_eval_setup["eval_impl"]
mock_evaluator_get = nvidia_eval_setup["mock_evaluator_get"]
def test_job_result(self):
# Mock Evaluator API responses # Mock Evaluator API responses
mock_job_status_response = {"id": "job-123", "status": "completed"} mock_job_status_response = {"id": "job-123", "status": "completed"}
mock_job_results_response = { mock_job_results_response = {
@ -194,13 +202,13 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
"status": "completed", "status": "completed",
"results": {MOCK_BENCHMARK_ID: {"score": 0.85, "details": {"accuracy": 0.85, "f1": 0.84}}}, "results": {MOCK_BENCHMARK_ID: {"score": 0.85, "details": {"accuracy": 0.85, "f1": 0.84}}},
} }
self.mock_evaluator_get.side_effect = [ mock_evaluator_get.side_effect = [
mock_job_status_response, # First call to retrieve job mock_job_status_response, # First call to retrieve job
mock_job_results_response, # Second call to retrieve job results mock_job_results_response, # Second call to retrieve job results
] ]
# Get the Evaluation job results # Get the Evaluation job results
result = self.run_async(self.eval_impl.job_result(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123")) result = await eval_impl.job_result(benchmark_id=MOCK_BENCHMARK_ID, job_id="job-123")
# Verify the result # Verify the result
assert isinstance(result, EvaluateResponse) assert isinstance(result, EvaluateResponse)
@ -208,6 +216,6 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
assert result.scores[MOCK_BENCHMARK_ID].aggregated_results["results"][MOCK_BENCHMARK_ID]["score"] == 0.85 assert result.scores[MOCK_BENCHMARK_ID].aggregated_results["results"][MOCK_BENCHMARK_ID]["score"] == 0.85
# Verify the API was called correctly # Verify the API was called correctly
assert self.mock_evaluator_get.call_count == 2 assert mock_evaluator_get.call_count == 2
self.mock_evaluator_get.assert_any_call("/v1/evaluation/jobs/job-123") mock_evaluator_get.assert_any_call("/v1/evaluation/jobs/job-123")
self.mock_evaluator_get.assert_any_call("/v1/evaluation/jobs/job-123/results") mock_evaluator_get.assert_any_call("/v1/evaluation/jobs/job-123/results")