mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Merge branch 'main' into add-mcp-authentication-param
This commit is contained in:
commit
114ab693a5
40 changed files with 2827 additions and 1700 deletions
|
|
@ -998,39 +998,6 @@ paths:
|
||||||
description: List models using the OpenAI API.
|
description: List models using the OpenAI API.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A Model.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/Model'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Models
|
|
||||||
summary: Register model.
|
|
||||||
description: >-
|
|
||||||
Register model.
|
|
||||||
|
|
||||||
Register a model.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterModelRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/models/{model_id}:
|
/v1/models/{model_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1065,36 +1032,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Models
|
|
||||||
summary: Unregister model.
|
|
||||||
description: >-
|
|
||||||
Unregister model.
|
|
||||||
|
|
||||||
Unregister a model.
|
|
||||||
parameters:
|
|
||||||
- name: model_id
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The identifier of the model to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/moderations:
|
/v1/moderations:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1725,32 +1662,6 @@ paths:
|
||||||
description: List all scoring functions.
|
description: List all scoring functions.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ScoringFunctions
|
|
||||||
summary: Register a scoring function.
|
|
||||||
description: Register a scoring function.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterScoringFunctionRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/scoring-functions/{scoring_fn_id}:
|
/v1/scoring-functions/{scoring_fn_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1782,33 +1693,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ScoringFunctions
|
|
||||||
summary: Unregister a scoring function.
|
|
||||||
description: Unregister a scoring function.
|
|
||||||
parameters:
|
|
||||||
- name: scoring_fn_id
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The ID of the scoring function to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/scoring/score:
|
/v1/scoring/score:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1897,36 +1781,6 @@ paths:
|
||||||
description: List all shields.
|
description: List all shields.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A Shield.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/Shield'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Shields
|
|
||||||
summary: Register a shield.
|
|
||||||
description: Register a shield.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterShieldRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/shields/{identifier}:
|
/v1/shields/{identifier}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1958,33 +1812,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Shields
|
|
||||||
summary: Unregister a shield.
|
|
||||||
description: Unregister a shield.
|
|
||||||
parameters:
|
|
||||||
- name: identifier
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The identifier of the shield to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/tool-runtime/invoke:
|
/v1/tool-runtime/invoke:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2080,32 +1907,6 @@ paths:
|
||||||
description: List tool groups with optional provider.
|
description: List tool groups with optional provider.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ToolGroups
|
|
||||||
summary: Register a tool group.
|
|
||||||
description: Register a tool group.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterToolGroupRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/toolgroups/{toolgroup_id}:
|
/v1/toolgroups/{toolgroup_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2137,32 +1938,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ToolGroups
|
|
||||||
summary: Unregister a tool group.
|
|
||||||
description: Unregister a tool group.
|
|
||||||
parameters:
|
|
||||||
- name: toolgroup_id
|
|
||||||
in: path
|
|
||||||
description: The ID of the tool group to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/tools:
|
/v1/tools:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2916,11 +2691,11 @@ paths:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: >-
|
description: >-
|
||||||
A list of InterleavedContent representing the file contents.
|
A VectorStoreFileContentResponse representing the file contents.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/VectorStoreFileContentsResponse'
|
$ref: '#/components/schemas/VectorStoreFileContentResponse'
|
||||||
'400':
|
'400':
|
||||||
$ref: '#/components/responses/BadRequest400'
|
$ref: '#/components/responses/BadRequest400'
|
||||||
'429':
|
'429':
|
||||||
|
|
@ -3171,7 +2946,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/RegisterDatasetRequest'
|
$ref: '#/components/schemas/RegisterDatasetRequest'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1beta/datasets/{dataset_id}:
|
/v1beta/datasets/{dataset_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -3228,7 +3003,7 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1alpha/eval/benchmarks:
|
/v1alpha/eval/benchmarks:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -3279,7 +3054,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1alpha/eval/benchmarks/{benchmark_id}:
|
/v1alpha/eval/benchmarks/{benchmark_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -3336,7 +3111,7 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -6280,46 +6055,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: OpenAIListModelsResponse
|
title: OpenAIListModelsResponse
|
||||||
ModelType:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- llm
|
|
||||||
- embedding
|
|
||||||
- rerank
|
|
||||||
title: ModelType
|
|
||||||
description: >-
|
|
||||||
Enumeration of supported model types in Llama Stack.
|
|
||||||
RegisterModelRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
model_id:
|
|
||||||
type: string
|
|
||||||
description: The identifier of the model to register.
|
|
||||||
provider_model_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The identifier of the model in the provider.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: The identifier of the provider.
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: Any additional metadata for this model.
|
|
||||||
model_type:
|
|
||||||
$ref: '#/components/schemas/ModelType'
|
|
||||||
description: The type of model to register.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- model_id
|
|
||||||
title: RegisterModelRequest
|
|
||||||
Model:
|
Model:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -6377,6 +6112,15 @@ components:
|
||||||
title: Model
|
title: Model
|
||||||
description: >-
|
description: >-
|
||||||
A model resource representing an AI model registered in Llama Stack.
|
A model resource representing an AI model registered in Llama Stack.
|
||||||
|
ModelType:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- llm
|
||||||
|
- embedding
|
||||||
|
- rerank
|
||||||
|
title: ModelType
|
||||||
|
description: >-
|
||||||
|
Enumeration of supported model types in Llama Stack.
|
||||||
RunModerationRequest:
|
RunModerationRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -9119,61 +8863,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListScoringFunctionsResponse
|
title: ListScoringFunctionsResponse
|
||||||
ParamType:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/StringType'
|
|
||||||
- $ref: '#/components/schemas/NumberType'
|
|
||||||
- $ref: '#/components/schemas/BooleanType'
|
|
||||||
- $ref: '#/components/schemas/ArrayType'
|
|
||||||
- $ref: '#/components/schemas/ObjectType'
|
|
||||||
- $ref: '#/components/schemas/JsonType'
|
|
||||||
- $ref: '#/components/schemas/UnionType'
|
|
||||||
- $ref: '#/components/schemas/ChatCompletionInputType'
|
|
||||||
- $ref: '#/components/schemas/CompletionInputType'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
string: '#/components/schemas/StringType'
|
|
||||||
number: '#/components/schemas/NumberType'
|
|
||||||
boolean: '#/components/schemas/BooleanType'
|
|
||||||
array: '#/components/schemas/ArrayType'
|
|
||||||
object: '#/components/schemas/ObjectType'
|
|
||||||
json: '#/components/schemas/JsonType'
|
|
||||||
union: '#/components/schemas/UnionType'
|
|
||||||
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
|
|
||||||
completion_input: '#/components/schemas/CompletionInputType'
|
|
||||||
RegisterScoringFunctionRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
scoring_fn_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the scoring function to register.
|
|
||||||
description:
|
|
||||||
type: string
|
|
||||||
description: The description of the scoring function.
|
|
||||||
return_type:
|
|
||||||
$ref: '#/components/schemas/ParamType'
|
|
||||||
description: The return type of the scoring function.
|
|
||||||
provider_scoring_fn_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider scoring function to use for the scoring function.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider to use for the scoring function.
|
|
||||||
params:
|
|
||||||
$ref: '#/components/schemas/ScoringFnParams'
|
|
||||||
description: >-
|
|
||||||
The parameters for the scoring function for benchmark eval, these can
|
|
||||||
be overridden for app eval.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- scoring_fn_id
|
|
||||||
- description
|
|
||||||
- return_type
|
|
||||||
title: RegisterScoringFunctionRequest
|
|
||||||
ScoreRequest:
|
ScoreRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -9349,35 +9038,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListShieldsResponse
|
title: ListShieldsResponse
|
||||||
RegisterShieldRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
shield_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The identifier of the shield to register.
|
|
||||||
provider_shield_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The identifier of the shield in the provider.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: The identifier of the provider.
|
|
||||||
params:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: The parameters of the shield.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- shield_id
|
|
||||||
title: RegisterShieldRequest
|
|
||||||
InvokeToolRequest:
|
InvokeToolRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -9638,37 +9298,6 @@ components:
|
||||||
title: ListToolGroupsResponse
|
title: ListToolGroupsResponse
|
||||||
description: >-
|
description: >-
|
||||||
Response containing a list of tool groups.
|
Response containing a list of tool groups.
|
||||||
RegisterToolGroupRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
toolgroup_id:
|
|
||||||
type: string
|
|
||||||
description: The ID of the tool group to register.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider to use for the tool group.
|
|
||||||
mcp_endpoint:
|
|
||||||
$ref: '#/components/schemas/URL'
|
|
||||||
description: >-
|
|
||||||
The MCP endpoint to use for the tool group.
|
|
||||||
args:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
|
||||||
A dictionary of arguments to pass to the tool group.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- toolgroup_id
|
|
||||||
- provider_id
|
|
||||||
title: RegisterToolGroupRequest
|
|
||||||
Chunk:
|
Chunk:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -10469,41 +10098,35 @@ components:
|
||||||
title: VectorStoreContent
|
title: VectorStoreContent
|
||||||
description: >-
|
description: >-
|
||||||
Content item from a vector store file or search result.
|
Content item from a vector store file or search result.
|
||||||
VectorStoreFileContentsResponse:
|
VectorStoreFileContentResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
file_id:
|
object:
|
||||||
type: string
|
type: string
|
||||||
description: Unique identifier for the file
|
const: vector_store.file_content.page
|
||||||
filename:
|
default: vector_store.file_content.page
|
||||||
type: string
|
|
||||||
description: Name of the file
|
|
||||||
attributes:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
description: >-
|
||||||
Key-value attributes associated with the file
|
The object type, which is always `vector_store.file_content.page`
|
||||||
content:
|
data:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/VectorStoreContent'
|
$ref: '#/components/schemas/VectorStoreContent'
|
||||||
description: List of content items from the file
|
description: Parsed content of the file
|
||||||
|
has_more:
|
||||||
|
type: boolean
|
||||||
|
description: >-
|
||||||
|
Indicates if there are more content pages to fetch
|
||||||
|
next_page:
|
||||||
|
type: string
|
||||||
|
description: The token for the next page, if any
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- file_id
|
- object
|
||||||
- filename
|
- data
|
||||||
- attributes
|
- has_more
|
||||||
- content
|
title: VectorStoreFileContentResponse
|
||||||
title: VectorStoreFileContentsResponse
|
|
||||||
description: >-
|
description: >-
|
||||||
Response from retrieving the contents of a vector store file.
|
Represents the parsed content of a vector store file.
|
||||||
OpenaiSearchVectorStoreRequest:
|
OpenaiSearchVectorStoreRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -10820,68 +10443,6 @@ components:
|
||||||
- data
|
- data
|
||||||
title: ListDatasetsResponse
|
title: ListDatasetsResponse
|
||||||
description: Response from listing datasets.
|
description: Response from listing datasets.
|
||||||
DataSource:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/URIDataSource'
|
|
||||||
- $ref: '#/components/schemas/RowsDataSource'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
uri: '#/components/schemas/URIDataSource'
|
|
||||||
rows: '#/components/schemas/RowsDataSource'
|
|
||||||
RegisterDatasetRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
purpose:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- post-training/messages
|
|
||||||
- eval/question-answer
|
|
||||||
- eval/messages-answer
|
|
||||||
description: >-
|
|
||||||
The purpose of the dataset. One of: - "post-training/messages": The dataset
|
|
||||||
contains a messages column with list of messages for post-training. {
|
|
||||||
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
|
|
||||||
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
|
|
||||||
contains a question column and an answer column for evaluation. { "question":
|
|
||||||
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
|
|
||||||
The dataset contains a messages column with list of messages and an answer
|
|
||||||
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
|
|
||||||
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
|
|
||||||
Doe. How can I help you today?"}, {"role": "user", "content": "What's
|
|
||||||
my name?"}, ], "answer": "John Doe" }
|
|
||||||
source:
|
|
||||||
$ref: '#/components/schemas/DataSource'
|
|
||||||
description: >-
|
|
||||||
The data source of the dataset. Ensure that the data source schema is
|
|
||||||
compatible with the purpose of the dataset. Examples: - { "type": "uri",
|
|
||||||
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
|
|
||||||
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
|
|
||||||
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
|
|
||||||
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
|
|
||||||
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
|
|
||||||
} ] }
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
|
||||||
The metadata for the dataset. - E.g. {"description": "My dataset"}.
|
|
||||||
dataset_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the dataset. If not provided, an ID will be generated.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- purpose
|
|
||||||
- source
|
|
||||||
title: RegisterDatasetRequest
|
|
||||||
Benchmark:
|
Benchmark:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -10949,47 +10510,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListBenchmarksResponse
|
title: ListBenchmarksResponse
|
||||||
RegisterBenchmarkRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
benchmark_id:
|
|
||||||
type: string
|
|
||||||
description: The ID of the benchmark to register.
|
|
||||||
dataset_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the dataset to use for the benchmark.
|
|
||||||
scoring_functions:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The scoring functions to use for the benchmark.
|
|
||||||
provider_benchmark_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider benchmark to use for the benchmark.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider to use for the benchmark.
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: The metadata to use for the benchmark.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- benchmark_id
|
|
||||||
- dataset_id
|
|
||||||
- scoring_functions
|
|
||||||
title: RegisterBenchmarkRequest
|
|
||||||
BenchmarkConfig:
|
BenchmarkConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -11851,6 +11371,109 @@ components:
|
||||||
- hyperparam_search_config
|
- hyperparam_search_config
|
||||||
- logger_config
|
- logger_config
|
||||||
title: SupervisedFineTuneRequest
|
title: SupervisedFineTuneRequest
|
||||||
|
DataSource:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/URIDataSource'
|
||||||
|
- $ref: '#/components/schemas/RowsDataSource'
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
uri: '#/components/schemas/URIDataSource'
|
||||||
|
rows: '#/components/schemas/RowsDataSource'
|
||||||
|
RegisterDatasetRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
purpose:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- post-training/messages
|
||||||
|
- eval/question-answer
|
||||||
|
- eval/messages-answer
|
||||||
|
description: >-
|
||||||
|
The purpose of the dataset. One of: - "post-training/messages": The dataset
|
||||||
|
contains a messages column with list of messages for post-training. {
|
||||||
|
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
|
||||||
|
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
|
||||||
|
contains a question column and an answer column for evaluation. { "question":
|
||||||
|
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
|
||||||
|
The dataset contains a messages column with list of messages and an answer
|
||||||
|
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
|
||||||
|
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
|
||||||
|
Doe. How can I help you today?"}, {"role": "user", "content": "What's
|
||||||
|
my name?"}, ], "answer": "John Doe" }
|
||||||
|
source:
|
||||||
|
$ref: '#/components/schemas/DataSource'
|
||||||
|
description: >-
|
||||||
|
The data source of the dataset. Ensure that the data source schema is
|
||||||
|
compatible with the purpose of the dataset. Examples: - { "type": "uri",
|
||||||
|
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
|
||||||
|
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
|
||||||
|
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
|
||||||
|
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
|
||||||
|
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
|
||||||
|
} ] }
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
description: >-
|
||||||
|
The metadata for the dataset. - E.g. {"description": "My dataset"}.
|
||||||
|
dataset_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the dataset. If not provided, an ID will be generated.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- purpose
|
||||||
|
- source
|
||||||
|
title: RegisterDatasetRequest
|
||||||
|
RegisterBenchmarkRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
benchmark_id:
|
||||||
|
type: string
|
||||||
|
description: The ID of the benchmark to register.
|
||||||
|
dataset_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the dataset to use for the benchmark.
|
||||||
|
scoring_functions:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The scoring functions to use for the benchmark.
|
||||||
|
provider_benchmark_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the provider benchmark to use for the benchmark.
|
||||||
|
provider_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the provider to use for the benchmark.
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
description: The metadata to use for the benchmark.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- benchmark_id
|
||||||
|
- dataset_id
|
||||||
|
- scoring_functions
|
||||||
|
title: RegisterBenchmarkRequest
|
||||||
responses:
|
responses:
|
||||||
BadRequest400:
|
BadRequest400:
|
||||||
description: The request was invalid or malformed
|
description: The request was invalid or malformed
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Kubernetes Deployment Guide
|
# Kubernetes Deployment Guide
|
||||||
|
|
||||||
Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers both local development with Kind and production deployment on AWS EKS.
|
Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers deployment using the Kubernetes operator to manage the Llama Stack server with Kind. The vLLM inference server is deployed manually.
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
|
|
@ -110,115 +110,176 @@ spec:
|
||||||
EOF
|
EOF
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 3: Configure Llama Stack
|
### Step 3: Install Kubernetes Operator
|
||||||
|
|
||||||
Update your run configuration:
|
Install the Llama Stack Kubernetes operator to manage Llama Stack deployments:
|
||||||
|
|
||||||
```yaml
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_id: vllm
|
|
||||||
provider_type: remote::vllm
|
|
||||||
config:
|
|
||||||
url: http://vllm-server.default.svc.cluster.local:8000/v1
|
|
||||||
max_tokens: 4096
|
|
||||||
api_token: fake
|
|
||||||
```
|
|
||||||
|
|
||||||
Build container image:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
tmp_dir=$(mktemp -d) && cat >$tmp_dir/Containerfile.llama-stack-run-k8s <<EOF
|
# Install from the latest main branch
|
||||||
FROM distribution-myenv:dev
|
kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/main/release/operator.yaml
|
||||||
RUN apt-get update && apt-get install -y git
|
|
||||||
RUN git clone https://github.com/meta-llama/llama-stack.git /app/llama-stack-source
|
# Or install a specific version (e.g., v0.4.0)
|
||||||
ADD ./vllm-llama-stack-run-k8s.yaml /app/config.yaml
|
# kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/v0.4.0/release/operator.yaml
|
||||||
EOF
|
|
||||||
podman build -f $tmp_dir/Containerfile.llama-stack-run-k8s -t llama-stack-run-k8s $tmp_dir
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 4: Deploy Llama Stack Server
|
Verify the operator is running:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl get pods -n llama-stack-operator-system
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information about the operator, see the [llama-stack-k8s-operator repository](https://github.com/llamastack/llama-stack-k8s-operator).
|
||||||
|
|
||||||
|
### Step 4: Deploy Llama Stack Server using Operator
|
||||||
|
|
||||||
|
Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources.
|
||||||
|
You can optionally override the default `run.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)).
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
cat <<EOF | kubectl apply -f -
|
cat <<EOF | kubectl apply -f -
|
||||||
apiVersion: v1
|
apiVersion: llamastack.io/v1alpha1
|
||||||
kind: PersistentVolumeClaim
|
kind: LlamaStackDistribution
|
||||||
metadata:
|
metadata:
|
||||||
name: llama-pvc
|
name: llamastack-vllm
|
||||||
spec:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 1Gi
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: llama-stack-server
|
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
server:
|
||||||
matchLabels:
|
distribution:
|
||||||
app.kubernetes.io/name: llama-stack
|
name: starter
|
||||||
template:
|
containerSpec:
|
||||||
metadata:
|
port: 8321
|
||||||
labels:
|
env:
|
||||||
app.kubernetes.io/name: llama-stack
|
- name: VLLM_URL
|
||||||
spec:
|
value: "http://vllm-server.default.svc.cluster.local:8000/v1"
|
||||||
containers:
|
- name: VLLM_MAX_TOKENS
|
||||||
- name: llama-stack
|
value: "4096"
|
||||||
image: localhost/llama-stack-run-k8s:latest
|
- name: VLLM_API_TOKEN
|
||||||
imagePullPolicy: IfNotPresent
|
value: "fake"
|
||||||
command: ["llama", "stack", "run", "/app/config.yaml"]
|
# Optional: override run.yaml from a ConfigMap using userConfig
|
||||||
ports:
|
userConfig:
|
||||||
- containerPort: 5000
|
configMap:
|
||||||
volumeMounts:
|
name: llama-stack-config
|
||||||
- name: llama-storage
|
storage:
|
||||||
mountPath: /root/.llama
|
size: "20Gi"
|
||||||
volumes:
|
mountPath: "/home/lls/.lls"
|
||||||
- name: llama-storage
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: llama-pvc
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: llama-stack-service
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: llama-stack
|
|
||||||
ports:
|
|
||||||
- protocol: TCP
|
|
||||||
port: 5000
|
|
||||||
targetPort: 5000
|
|
||||||
type: ClusterIP
|
|
||||||
EOF
|
EOF
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Configuration Options:**
|
||||||
|
|
||||||
|
- `replicas`: Number of Llama Stack server instances to run
|
||||||
|
- `server.distribution.name`: The distribution to use (e.g., `starter` for the starter distribution). See the [list of supported distributions](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/distributions.json) in the operator repository.
|
||||||
|
- `server.distribution.image`: (Optional) Custom container image for non-supported distributions. Use this field when deploying a distribution that is not in the supported list. If specified, this takes precedence over `name`.
|
||||||
|
- `server.containerSpec.port`: Port on which the Llama Stack server listens (default: 8321)
|
||||||
|
- `server.containerSpec.env`: Environment variables to configure providers:
|
||||||
|
- `server.userConfig`: (Optional) Override the default `run.yaml` using a ConfigMap. See [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec).
|
||||||
|
- `server.storage.size`: Size of the persistent volume for model and data storage
|
||||||
|
- `server.storage.mountPath`: Where to mount the storage in the container
|
||||||
|
|
||||||
|
**Note:** For a complete list of supported distributions, see [distributions.json](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/distributions.json) in the operator repository. To use a custom or non-supported distribution, set the `server.distribution.image` field with your container image instead of `server.distribution.name`.
|
||||||
|
|
||||||
|
The operator automatically creates:
|
||||||
|
- A Deployment for the Llama Stack server
|
||||||
|
- A Service to access the server
|
||||||
|
- A PersistentVolumeClaim for storage
|
||||||
|
- All necessary RBAC resources
|
||||||
|
|
||||||
|
|
||||||
|
Check the status of your deployment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl get llamastackdistribution
|
||||||
|
kubectl describe llamastackdistribution llamastack-vllm
|
||||||
|
```
|
||||||
|
|
||||||
### Step 5: Test Deployment
|
### Step 5: Test Deployment
|
||||||
|
|
||||||
|
Wait for the Llama Stack server pod to be ready:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Port forward and test
|
# Check the status of the LlamaStackDistribution
|
||||||
kubectl port-forward service/llama-stack-service 5000:5000
|
kubectl get llamastackdistribution llamastack-vllm
|
||||||
llama-stack-client --endpoint http://localhost:5000 inference chat-completion --message "hello, what model are you?"
|
|
||||||
|
# Check the pods created by the operator
|
||||||
|
kubectl get pods -l app.kubernetes.io/name=llama-stack
|
||||||
|
|
||||||
|
# Wait for the pod to be ready
|
||||||
|
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=llama-stack --timeout=300s
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the service name created by the operator (it typically follows the pattern `<llamastackdistribution-name>-service`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# List services to find the service name
|
||||||
|
kubectl get services | grep llamastack
|
||||||
|
|
||||||
|
# Port forward and test (replace SERVICE_NAME with the actual service name)
|
||||||
|
kubectl port-forward service/llamastack-vllm-service 8321:8321
|
||||||
|
```
|
||||||
|
|
||||||
|
In another terminal, test the deployment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llama-stack-client --endpoint http://localhost:8321 inference chat-completion --message "hello, what model are you?"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
**Check pod status:**
|
### vLLM Server Issues
|
||||||
|
|
||||||
|
**Check vLLM pod status:**
|
||||||
```bash
|
```bash
|
||||||
kubectl get pods -l app.kubernetes.io/name=vllm
|
kubectl get pods -l app.kubernetes.io/name=vllm
|
||||||
kubectl logs -l app.kubernetes.io/name=vllm
|
kubectl logs -l app.kubernetes.io/name=vllm
|
||||||
```
|
```
|
||||||
|
|
||||||
**Test service connectivity:**
|
**Test vLLM service connectivity:**
|
||||||
```bash
|
```bash
|
||||||
kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://vllm-server:8000/v1/models
|
kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://vllm-server:8000/v1/models
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Llama Stack Server Issues
|
||||||
|
|
||||||
|
**Check LlamaStackDistribution status:**
|
||||||
|
```bash
|
||||||
|
# Get detailed status
|
||||||
|
kubectl describe llamastackdistribution llamastack-vllm
|
||||||
|
|
||||||
|
# Check for events
|
||||||
|
kubectl get events --sort-by='.lastTimestamp' | grep llamastack-vllm
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check operator-managed pods:**
|
||||||
|
```bash
|
||||||
|
# List all pods managed by the operator
|
||||||
|
kubectl get pods -l app.kubernetes.io/name=llama-stack
|
||||||
|
|
||||||
|
# Check pod logs (replace POD_NAME with actual pod name)
|
||||||
|
kubectl logs -l app.kubernetes.io/name=llama-stack
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check operator status:**
|
||||||
|
```bash
|
||||||
|
# Verify the operator is running
|
||||||
|
kubectl get pods -n llama-stack-operator-system
|
||||||
|
|
||||||
|
# Check operator logs if issues persist
|
||||||
|
kubectl logs -n llama-stack-operator-system -l control-plane=controller-manager
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify service connectivity:**
|
||||||
|
```bash
|
||||||
|
# Get the service endpoint
|
||||||
|
kubectl get svc llamastack-vllm-service
|
||||||
|
|
||||||
|
# Test connectivity from within the cluster
|
||||||
|
kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://llamastack-vllm-service:8321/health
|
||||||
|
```
|
||||||
|
|
||||||
## Related Resources
|
## Related Resources
|
||||||
|
|
||||||
- **[Deployment Overview](/docs/deploying/)** - Overview of deployment options
|
- **[Deployment Overview](/docs/deploying/)** - Overview of deployment options
|
||||||
- **[Distributions](/docs/distributions)** - Understanding Llama Stack distributions
|
- **[Distributions](/docs/distributions)** - Understanding Llama Stack distributions
|
||||||
- **[Configuration](/docs/distributions/configuration)** - Detailed configuration options
|
- **[Configuration](/docs/distributions/configuration)** - Detailed configuration options
|
||||||
|
- **[LlamaStack Operator](https://github.com/llamastack/llama-stack-k8s-operator)** - Overview of llama-stack kubernetes operator
|
||||||
|
- **[LlamaStackDistribution](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md)** - API Spec of the llama-stack operator Custom Resource.
|
||||||
|
|
|
||||||
143
docs/docs/distributions/remote_hosted_distro/oci.md
Normal file
143
docs/docs/distributions/remote_hosted_distro/oci.md
Normal file
|
|
@ -0,0 +1,143 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
|
# OCI Distribution
|
||||||
|
|
||||||
|
The `llamastack/distribution-oci` distribution consists of the following provider configurations.
|
||||||
|
|
||||||
|
| API | Provider(s) |
|
||||||
|
|-----|-------------|
|
||||||
|
| agents | `inline::meta-reference` |
|
||||||
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
|
| eval | `inline::meta-reference` |
|
||||||
|
| files | `inline::localfs` |
|
||||||
|
| inference | `remote::oci` |
|
||||||
|
| safety | `inline::llama-guard` |
|
||||||
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
The following environment variables can be configured:
|
||||||
|
|
||||||
|
- `OCI_AUTH_TYPE`: OCI authentication type (instance_principal or config_file) (default: `instance_principal`)
|
||||||
|
- `OCI_REGION`: OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1) (default: ``)
|
||||||
|
- `OCI_COMPARTMENT_OCID`: OCI compartment ID for the Generative AI service (default: ``)
|
||||||
|
- `OCI_CONFIG_FILE_PATH`: OCI config file path (required if OCI_AUTH_TYPE is config_file) (default: `~/.oci/config`)
|
||||||
|
- `OCI_CLI_PROFILE`: OCI CLI profile name to use from config file (default: `DEFAULT`)
|
||||||
|
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
### Oracle Cloud Infrastructure Setup
|
||||||
|
|
||||||
|
Before using the OCI Generative AI distribution, ensure you have:
|
||||||
|
|
||||||
|
1. **Oracle Cloud Infrastructure Account**: Sign up at [Oracle Cloud Infrastructure](https://cloud.oracle.com/)
|
||||||
|
2. **Generative AI Service Access**: Enable the Generative AI service in your OCI tenancy
|
||||||
|
3. **Compartment**: Create or identify a compartment where you'll deploy Generative AI models
|
||||||
|
4. **Authentication**: Configure authentication using either:
|
||||||
|
- **Instance Principal** (recommended for cloud-hosted deployments)
|
||||||
|
- **API Key** (for on-premises or development environments)
|
||||||
|
|
||||||
|
### Authentication Methods
|
||||||
|
|
||||||
|
#### Instance Principal Authentication (Recommended)
|
||||||
|
Instance Principal authentication allows OCI resources to authenticate using the identity of the compute instance they're running on. This is the most secure method for production deployments.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Instance must be running in an Oracle Cloud Infrastructure compartment
|
||||||
|
- Instance must have appropriate IAM policies to access Generative AI services
|
||||||
|
|
||||||
|
#### API Key Authentication
|
||||||
|
For development or on-premises deployments, follow [this doc](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm) to learn how to create your API signing key for your config file.
|
||||||
|
|
||||||
|
### Required IAM Policies
|
||||||
|
|
||||||
|
Ensure your OCI user or instance has the following policy statements:
|
||||||
|
|
||||||
|
```
|
||||||
|
Allow group <group_name> to use generative-ai-inference-endpoints in compartment <compartment_name>
|
||||||
|
Allow group <group_name> to manage generative-ai-inference-endpoints in compartment <compartment_name>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Supported Services
|
||||||
|
|
||||||
|
### Inference: OCI Generative AI
|
||||||
|
Oracle Cloud Infrastructure Generative AI provides access to high-performance AI models through OCI's Platform-as-a-Service offering. The service supports:
|
||||||
|
|
||||||
|
- **Chat Completions**: Conversational AI with context awareness
|
||||||
|
- **Text Generation**: Complete prompts and generate text content
|
||||||
|
|
||||||
|
#### Available Models
|
||||||
|
Common OCI Generative AI models include access to Meta, Cohere, OpenAI, Grok, and more models.
|
||||||
|
|
||||||
|
### Safety: Llama Guard
|
||||||
|
For content safety and moderation, this distribution uses Meta's LlamaGuard model through the OCI Generative AI service to provide:
|
||||||
|
- Content filtering and moderation
|
||||||
|
- Policy compliance checking
|
||||||
|
- Harmful content detection
|
||||||
|
|
||||||
|
### Vector Storage: Multiple Options
|
||||||
|
The distribution supports several vector storage providers:
|
||||||
|
- **FAISS**: Local in-memory vector search
|
||||||
|
- **ChromaDB**: Distributed vector database
|
||||||
|
- **PGVector**: PostgreSQL with vector extensions
|
||||||
|
|
||||||
|
### Additional Services
|
||||||
|
- **Dataset I/O**: Local filesystem and Hugging Face integration
|
||||||
|
- **Tool Runtime**: Web search (Brave, Tavily) and RAG capabilities
|
||||||
|
- **Evaluation**: Meta reference evaluation framework
|
||||||
|
|
||||||
|
## Running Llama Stack with OCI
|
||||||
|
|
||||||
|
You can run the OCI distribution via Docker or local virtual environment.
|
||||||
|
|
||||||
|
### Via venv
|
||||||
|
|
||||||
|
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
OCI_AUTH=$OCI_AUTH_TYPE OCI_REGION=$OCI_REGION OCI_COMPARTMENT_OCID=$OCI_COMPARTMENT_OCID llama stack run --port 8321 oci
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration Examples
|
||||||
|
|
||||||
|
#### Using Instance Principal (Recommended for Production)
|
||||||
|
```bash
|
||||||
|
export OCI_AUTH_TYPE=instance_principal
|
||||||
|
export OCI_REGION=us-chicago-1
|
||||||
|
export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..<your-compartment-id>
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Using API Key Authentication (Development)
|
||||||
|
```bash
|
||||||
|
export OCI_AUTH_TYPE=config_file
|
||||||
|
export OCI_CONFIG_FILE_PATH=~/.oci/config
|
||||||
|
export OCI_CLI_PROFILE=DEFAULT
|
||||||
|
export OCI_REGION=us-chicago-1
|
||||||
|
export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..your-compartment-id
|
||||||
|
```
|
||||||
|
|
||||||
|
## Regional Endpoints
|
||||||
|
|
||||||
|
OCI Generative AI is available in multiple regions. The service automatically routes to the appropriate regional endpoint based on your configuration. For a full list of regional model availability, visit:
|
||||||
|
|
||||||
|
https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm#regions
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **Authentication Errors**: Verify your OCI credentials and IAM policies
|
||||||
|
2. **Model Not Found**: Ensure the model OCID is correct and the model is available in your region
|
||||||
|
3. **Permission Denied**: Check compartment permissions and Generative AI service access
|
||||||
|
4. **Region Unavailable**: Verify the specified region supports Generative AI services
|
||||||
|
|
||||||
|
### Getting Help
|
||||||
|
|
||||||
|
For additional support:
|
||||||
|
- [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm)
|
||||||
|
- [Llama Stack Issues](https://github.com/meta-llama/llama-stack/issues)
|
||||||
41
docs/docs/providers/inference/remote_oci.mdx
Normal file
41
docs/docs/providers/inference/remote_oci.mdx
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
---
|
||||||
|
description: |
|
||||||
|
Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
|
||||||
|
Provider documentation
|
||||||
|
https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
|
||||||
|
sidebar_label: Remote - Oci
|
||||||
|
title: remote::oci
|
||||||
|
---
|
||||||
|
|
||||||
|
# remote::oci
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
|
||||||
|
Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
|
||||||
|
Provider documentation
|
||||||
|
https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider |
|
||||||
|
| `oci_auth_type` | `<class 'str'>` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) |
|
||||||
|
| `oci_region` | `<class 'str'>` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) |
|
||||||
|
| `oci_compartment_id` | `<class 'str'>` | No | | OCI compartment ID for the Generative AI service |
|
||||||
|
| `oci_config_file_path` | `<class 'str'>` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) |
|
||||||
|
| `oci_config_profile` | `<class 'str'>` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal}
|
||||||
|
oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}
|
||||||
|
oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT}
|
||||||
|
oci_region: ${env.OCI_REGION:=us-ashburn-1}
|
||||||
|
oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=}
|
||||||
|
```
|
||||||
1094
docs/static/deprecated-llama-stack-spec.yaml
vendored
1094
docs/static/deprecated-llama-stack-spec.yaml
vendored
File diff suppressed because it is too large
Load diff
214
docs/static/experimental-llama-stack-spec.yaml
vendored
214
docs/static/experimental-llama-stack-spec.yaml
vendored
|
|
@ -162,7 +162,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/RegisterDatasetRequest'
|
$ref: '#/components/schemas/RegisterDatasetRequest'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1beta/datasets/{dataset_id}:
|
/v1beta/datasets/{dataset_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -219,7 +219,7 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1alpha/eval/benchmarks:
|
/v1alpha/eval/benchmarks:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -270,7 +270,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1alpha/eval/benchmarks/{benchmark_id}:
|
/v1alpha/eval/benchmarks/{benchmark_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -327,7 +327,7 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -936,68 +936,6 @@ components:
|
||||||
- data
|
- data
|
||||||
title: ListDatasetsResponse
|
title: ListDatasetsResponse
|
||||||
description: Response from listing datasets.
|
description: Response from listing datasets.
|
||||||
DataSource:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/URIDataSource'
|
|
||||||
- $ref: '#/components/schemas/RowsDataSource'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
uri: '#/components/schemas/URIDataSource'
|
|
||||||
rows: '#/components/schemas/RowsDataSource'
|
|
||||||
RegisterDatasetRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
purpose:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- post-training/messages
|
|
||||||
- eval/question-answer
|
|
||||||
- eval/messages-answer
|
|
||||||
description: >-
|
|
||||||
The purpose of the dataset. One of: - "post-training/messages": The dataset
|
|
||||||
contains a messages column with list of messages for post-training. {
|
|
||||||
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
|
|
||||||
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
|
|
||||||
contains a question column and an answer column for evaluation. { "question":
|
|
||||||
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
|
|
||||||
The dataset contains a messages column with list of messages and an answer
|
|
||||||
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
|
|
||||||
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
|
|
||||||
Doe. How can I help you today?"}, {"role": "user", "content": "What's
|
|
||||||
my name?"}, ], "answer": "John Doe" }
|
|
||||||
source:
|
|
||||||
$ref: '#/components/schemas/DataSource'
|
|
||||||
description: >-
|
|
||||||
The data source of the dataset. Ensure that the data source schema is
|
|
||||||
compatible with the purpose of the dataset. Examples: - { "type": "uri",
|
|
||||||
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
|
|
||||||
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
|
|
||||||
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
|
|
||||||
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
|
|
||||||
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
|
|
||||||
} ] }
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
|
||||||
The metadata for the dataset. - E.g. {"description": "My dataset"}.
|
|
||||||
dataset_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the dataset. If not provided, an ID will be generated.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- purpose
|
|
||||||
- source
|
|
||||||
title: RegisterDatasetRequest
|
|
||||||
Benchmark:
|
Benchmark:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -1065,47 +1003,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListBenchmarksResponse
|
title: ListBenchmarksResponse
|
||||||
RegisterBenchmarkRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
benchmark_id:
|
|
||||||
type: string
|
|
||||||
description: The ID of the benchmark to register.
|
|
||||||
dataset_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the dataset to use for the benchmark.
|
|
||||||
scoring_functions:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The scoring functions to use for the benchmark.
|
|
||||||
provider_benchmark_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider benchmark to use for the benchmark.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider to use for the benchmark.
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: The metadata to use for the benchmark.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- benchmark_id
|
|
||||||
- dataset_id
|
|
||||||
- scoring_functions
|
|
||||||
title: RegisterBenchmarkRequest
|
|
||||||
AggregationFunctionType:
|
AggregationFunctionType:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
|
|
@ -2254,6 +2151,109 @@ components:
|
||||||
- hyperparam_search_config
|
- hyperparam_search_config
|
||||||
- logger_config
|
- logger_config
|
||||||
title: SupervisedFineTuneRequest
|
title: SupervisedFineTuneRequest
|
||||||
|
DataSource:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/URIDataSource'
|
||||||
|
- $ref: '#/components/schemas/RowsDataSource'
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
uri: '#/components/schemas/URIDataSource'
|
||||||
|
rows: '#/components/schemas/RowsDataSource'
|
||||||
|
RegisterDatasetRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
purpose:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- post-training/messages
|
||||||
|
- eval/question-answer
|
||||||
|
- eval/messages-answer
|
||||||
|
description: >-
|
||||||
|
The purpose of the dataset. One of: - "post-training/messages": The dataset
|
||||||
|
contains a messages column with list of messages for post-training. {
|
||||||
|
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
|
||||||
|
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
|
||||||
|
contains a question column and an answer column for evaluation. { "question":
|
||||||
|
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
|
||||||
|
The dataset contains a messages column with list of messages and an answer
|
||||||
|
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
|
||||||
|
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
|
||||||
|
Doe. How can I help you today?"}, {"role": "user", "content": "What's
|
||||||
|
my name?"}, ], "answer": "John Doe" }
|
||||||
|
source:
|
||||||
|
$ref: '#/components/schemas/DataSource'
|
||||||
|
description: >-
|
||||||
|
The data source of the dataset. Ensure that the data source schema is
|
||||||
|
compatible with the purpose of the dataset. Examples: - { "type": "uri",
|
||||||
|
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
|
||||||
|
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
|
||||||
|
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
|
||||||
|
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
|
||||||
|
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
|
||||||
|
} ] }
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
description: >-
|
||||||
|
The metadata for the dataset. - E.g. {"description": "My dataset"}.
|
||||||
|
dataset_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the dataset. If not provided, an ID will be generated.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- purpose
|
||||||
|
- source
|
||||||
|
title: RegisterDatasetRequest
|
||||||
|
RegisterBenchmarkRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
benchmark_id:
|
||||||
|
type: string
|
||||||
|
description: The ID of the benchmark to register.
|
||||||
|
dataset_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the dataset to use for the benchmark.
|
||||||
|
scoring_functions:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The scoring functions to use for the benchmark.
|
||||||
|
provider_benchmark_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the provider benchmark to use for the benchmark.
|
||||||
|
provider_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the provider to use for the benchmark.
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
description: The metadata to use for the benchmark.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- benchmark_id
|
||||||
|
- dataset_id
|
||||||
|
- scoring_functions
|
||||||
|
title: RegisterBenchmarkRequest
|
||||||
responses:
|
responses:
|
||||||
BadRequest400:
|
BadRequest400:
|
||||||
description: The request was invalid or malformed
|
description: The request was invalid or malformed
|
||||||
|
|
|
||||||
437
docs/static/llama-stack-spec.yaml
vendored
437
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -995,39 +995,6 @@ paths:
|
||||||
description: List models using the OpenAI API.
|
description: List models using the OpenAI API.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A Model.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/Model'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Models
|
|
||||||
summary: Register model.
|
|
||||||
description: >-
|
|
||||||
Register model.
|
|
||||||
|
|
||||||
Register a model.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterModelRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/models/{model_id}:
|
/v1/models/{model_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1062,36 +1029,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Models
|
|
||||||
summary: Unregister model.
|
|
||||||
description: >-
|
|
||||||
Unregister model.
|
|
||||||
|
|
||||||
Unregister a model.
|
|
||||||
parameters:
|
|
||||||
- name: model_id
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The identifier of the model to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/moderations:
|
/v1/moderations:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1722,32 +1659,6 @@ paths:
|
||||||
description: List all scoring functions.
|
description: List all scoring functions.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ScoringFunctions
|
|
||||||
summary: Register a scoring function.
|
|
||||||
description: Register a scoring function.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterScoringFunctionRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/scoring-functions/{scoring_fn_id}:
|
/v1/scoring-functions/{scoring_fn_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1779,33 +1690,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ScoringFunctions
|
|
||||||
summary: Unregister a scoring function.
|
|
||||||
description: Unregister a scoring function.
|
|
||||||
parameters:
|
|
||||||
- name: scoring_fn_id
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The ID of the scoring function to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/scoring/score:
|
/v1/scoring/score:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1894,36 +1778,6 @@ paths:
|
||||||
description: List all shields.
|
description: List all shields.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A Shield.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/Shield'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Shields
|
|
||||||
summary: Register a shield.
|
|
||||||
description: Register a shield.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterShieldRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/shields/{identifier}:
|
/v1/shields/{identifier}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1955,33 +1809,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Shields
|
|
||||||
summary: Unregister a shield.
|
|
||||||
description: Unregister a shield.
|
|
||||||
parameters:
|
|
||||||
- name: identifier
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The identifier of the shield to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/tool-runtime/invoke:
|
/v1/tool-runtime/invoke:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2077,32 +1904,6 @@ paths:
|
||||||
description: List tool groups with optional provider.
|
description: List tool groups with optional provider.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ToolGroups
|
|
||||||
summary: Register a tool group.
|
|
||||||
description: Register a tool group.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterToolGroupRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/toolgroups/{toolgroup_id}:
|
/v1/toolgroups/{toolgroup_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2134,32 +1935,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ToolGroups
|
|
||||||
summary: Unregister a tool group.
|
|
||||||
description: Unregister a tool group.
|
|
||||||
parameters:
|
|
||||||
- name: toolgroup_id
|
|
||||||
in: path
|
|
||||||
description: The ID of the tool group to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/tools:
|
/v1/tools:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2913,11 +2688,11 @@ paths:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: >-
|
description: >-
|
||||||
A list of InterleavedContent representing the file contents.
|
A VectorStoreFileContentResponse representing the file contents.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/VectorStoreFileContentsResponse'
|
$ref: '#/components/schemas/VectorStoreFileContentResponse'
|
||||||
'400':
|
'400':
|
||||||
$ref: '#/components/responses/BadRequest400'
|
$ref: '#/components/responses/BadRequest400'
|
||||||
'429':
|
'429':
|
||||||
|
|
@ -5564,46 +5339,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: OpenAIListModelsResponse
|
title: OpenAIListModelsResponse
|
||||||
ModelType:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- llm
|
|
||||||
- embedding
|
|
||||||
- rerank
|
|
||||||
title: ModelType
|
|
||||||
description: >-
|
|
||||||
Enumeration of supported model types in Llama Stack.
|
|
||||||
RegisterModelRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
model_id:
|
|
||||||
type: string
|
|
||||||
description: The identifier of the model to register.
|
|
||||||
provider_model_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The identifier of the model in the provider.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: The identifier of the provider.
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: Any additional metadata for this model.
|
|
||||||
model_type:
|
|
||||||
$ref: '#/components/schemas/ModelType'
|
|
||||||
description: The type of model to register.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- model_id
|
|
||||||
title: RegisterModelRequest
|
|
||||||
Model:
|
Model:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -5661,6 +5396,15 @@ components:
|
||||||
title: Model
|
title: Model
|
||||||
description: >-
|
description: >-
|
||||||
A model resource representing an AI model registered in Llama Stack.
|
A model resource representing an AI model registered in Llama Stack.
|
||||||
|
ModelType:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- llm
|
||||||
|
- embedding
|
||||||
|
- rerank
|
||||||
|
title: ModelType
|
||||||
|
description: >-
|
||||||
|
Enumeration of supported model types in Llama Stack.
|
||||||
RunModerationRequest:
|
RunModerationRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -8403,61 +8147,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListScoringFunctionsResponse
|
title: ListScoringFunctionsResponse
|
||||||
ParamType:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/StringType'
|
|
||||||
- $ref: '#/components/schemas/NumberType'
|
|
||||||
- $ref: '#/components/schemas/BooleanType'
|
|
||||||
- $ref: '#/components/schemas/ArrayType'
|
|
||||||
- $ref: '#/components/schemas/ObjectType'
|
|
||||||
- $ref: '#/components/schemas/JsonType'
|
|
||||||
- $ref: '#/components/schemas/UnionType'
|
|
||||||
- $ref: '#/components/schemas/ChatCompletionInputType'
|
|
||||||
- $ref: '#/components/schemas/CompletionInputType'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
string: '#/components/schemas/StringType'
|
|
||||||
number: '#/components/schemas/NumberType'
|
|
||||||
boolean: '#/components/schemas/BooleanType'
|
|
||||||
array: '#/components/schemas/ArrayType'
|
|
||||||
object: '#/components/schemas/ObjectType'
|
|
||||||
json: '#/components/schemas/JsonType'
|
|
||||||
union: '#/components/schemas/UnionType'
|
|
||||||
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
|
|
||||||
completion_input: '#/components/schemas/CompletionInputType'
|
|
||||||
RegisterScoringFunctionRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
scoring_fn_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the scoring function to register.
|
|
||||||
description:
|
|
||||||
type: string
|
|
||||||
description: The description of the scoring function.
|
|
||||||
return_type:
|
|
||||||
$ref: '#/components/schemas/ParamType'
|
|
||||||
description: The return type of the scoring function.
|
|
||||||
provider_scoring_fn_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider scoring function to use for the scoring function.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider to use for the scoring function.
|
|
||||||
params:
|
|
||||||
$ref: '#/components/schemas/ScoringFnParams'
|
|
||||||
description: >-
|
|
||||||
The parameters for the scoring function for benchmark eval, these can
|
|
||||||
be overridden for app eval.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- scoring_fn_id
|
|
||||||
- description
|
|
||||||
- return_type
|
|
||||||
title: RegisterScoringFunctionRequest
|
|
||||||
ScoreRequest:
|
ScoreRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -8633,35 +8322,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListShieldsResponse
|
title: ListShieldsResponse
|
||||||
RegisterShieldRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
shield_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The identifier of the shield to register.
|
|
||||||
provider_shield_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The identifier of the shield in the provider.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: The identifier of the provider.
|
|
||||||
params:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: The parameters of the shield.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- shield_id
|
|
||||||
title: RegisterShieldRequest
|
|
||||||
InvokeToolRequest:
|
InvokeToolRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -8922,37 +8582,6 @@ components:
|
||||||
title: ListToolGroupsResponse
|
title: ListToolGroupsResponse
|
||||||
description: >-
|
description: >-
|
||||||
Response containing a list of tool groups.
|
Response containing a list of tool groups.
|
||||||
RegisterToolGroupRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
toolgroup_id:
|
|
||||||
type: string
|
|
||||||
description: The ID of the tool group to register.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider to use for the tool group.
|
|
||||||
mcp_endpoint:
|
|
||||||
$ref: '#/components/schemas/URL'
|
|
||||||
description: >-
|
|
||||||
The MCP endpoint to use for the tool group.
|
|
||||||
args:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
|
||||||
A dictionary of arguments to pass to the tool group.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- toolgroup_id
|
|
||||||
- provider_id
|
|
||||||
title: RegisterToolGroupRequest
|
|
||||||
Chunk:
|
Chunk:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -9753,41 +9382,35 @@ components:
|
||||||
title: VectorStoreContent
|
title: VectorStoreContent
|
||||||
description: >-
|
description: >-
|
||||||
Content item from a vector store file or search result.
|
Content item from a vector store file or search result.
|
||||||
VectorStoreFileContentsResponse:
|
VectorStoreFileContentResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
file_id:
|
object:
|
||||||
type: string
|
type: string
|
||||||
description: Unique identifier for the file
|
const: vector_store.file_content.page
|
||||||
filename:
|
default: vector_store.file_content.page
|
||||||
type: string
|
|
||||||
description: Name of the file
|
|
||||||
attributes:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
description: >-
|
||||||
Key-value attributes associated with the file
|
The object type, which is always `vector_store.file_content.page`
|
||||||
content:
|
data:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/VectorStoreContent'
|
$ref: '#/components/schemas/VectorStoreContent'
|
||||||
description: List of content items from the file
|
description: Parsed content of the file
|
||||||
|
has_more:
|
||||||
|
type: boolean
|
||||||
|
description: >-
|
||||||
|
Indicates if there are more content pages to fetch
|
||||||
|
next_page:
|
||||||
|
type: string
|
||||||
|
description: The token for the next page, if any
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- file_id
|
- object
|
||||||
- filename
|
- data
|
||||||
- attributes
|
- has_more
|
||||||
- content
|
title: VectorStoreFileContentResponse
|
||||||
title: VectorStoreFileContentsResponse
|
|
||||||
description: >-
|
description: >-
|
||||||
Response from retrieving the contents of a vector store file.
|
Represents the parsed content of a vector store file.
|
||||||
OpenaiSearchVectorStoreRequest:
|
OpenaiSearchVectorStoreRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
||||||
651
docs/static/stainless-llama-stack-spec.yaml
vendored
651
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -998,39 +998,6 @@ paths:
|
||||||
description: List models using the OpenAI API.
|
description: List models using the OpenAI API.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A Model.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/Model'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Models
|
|
||||||
summary: Register model.
|
|
||||||
description: >-
|
|
||||||
Register model.
|
|
||||||
|
|
||||||
Register a model.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterModelRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/models/{model_id}:
|
/v1/models/{model_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1065,36 +1032,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Models
|
|
||||||
summary: Unregister model.
|
|
||||||
description: >-
|
|
||||||
Unregister model.
|
|
||||||
|
|
||||||
Unregister a model.
|
|
||||||
parameters:
|
|
||||||
- name: model_id
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The identifier of the model to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/moderations:
|
/v1/moderations:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1725,32 +1662,6 @@ paths:
|
||||||
description: List all scoring functions.
|
description: List all scoring functions.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ScoringFunctions
|
|
||||||
summary: Register a scoring function.
|
|
||||||
description: Register a scoring function.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterScoringFunctionRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/scoring-functions/{scoring_fn_id}:
|
/v1/scoring-functions/{scoring_fn_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1782,33 +1693,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ScoringFunctions
|
|
||||||
summary: Unregister a scoring function.
|
|
||||||
description: Unregister a scoring function.
|
|
||||||
parameters:
|
|
||||||
- name: scoring_fn_id
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The ID of the scoring function to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/scoring/score:
|
/v1/scoring/score:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1897,36 +1781,6 @@ paths:
|
||||||
description: List all shields.
|
description: List all shields.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A Shield.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/Shield'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Shields
|
|
||||||
summary: Register a shield.
|
|
||||||
description: Register a shield.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterShieldRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/shields/{identifier}:
|
/v1/shields/{identifier}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1958,33 +1812,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Shields
|
|
||||||
summary: Unregister a shield.
|
|
||||||
description: Unregister a shield.
|
|
||||||
parameters:
|
|
||||||
- name: identifier
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The identifier of the shield to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/tool-runtime/invoke:
|
/v1/tool-runtime/invoke:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2080,32 +1907,6 @@ paths:
|
||||||
description: List tool groups with optional provider.
|
description: List tool groups with optional provider.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ToolGroups
|
|
||||||
summary: Register a tool group.
|
|
||||||
description: Register a tool group.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/RegisterToolGroupRequest'
|
|
||||||
required: true
|
|
||||||
deprecated: false
|
|
||||||
/v1/toolgroups/{toolgroup_id}:
|
/v1/toolgroups/{toolgroup_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2137,32 +1938,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: false
|
||||||
delete:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- ToolGroups
|
|
||||||
summary: Unregister a tool group.
|
|
||||||
description: Unregister a tool group.
|
|
||||||
parameters:
|
|
||||||
- name: toolgroup_id
|
|
||||||
in: path
|
|
||||||
description: The ID of the tool group to unregister.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
deprecated: false
|
|
||||||
/v1/tools:
|
/v1/tools:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2916,11 +2691,11 @@ paths:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: >-
|
description: >-
|
||||||
A list of InterleavedContent representing the file contents.
|
A VectorStoreFileContentResponse representing the file contents.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/VectorStoreFileContentsResponse'
|
$ref: '#/components/schemas/VectorStoreFileContentResponse'
|
||||||
'400':
|
'400':
|
||||||
$ref: '#/components/responses/BadRequest400'
|
$ref: '#/components/responses/BadRequest400'
|
||||||
'429':
|
'429':
|
||||||
|
|
@ -3171,7 +2946,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/RegisterDatasetRequest'
|
$ref: '#/components/schemas/RegisterDatasetRequest'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1beta/datasets/{dataset_id}:
|
/v1beta/datasets/{dataset_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -3228,7 +3003,7 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1alpha/eval/benchmarks:
|
/v1alpha/eval/benchmarks:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -3279,7 +3054,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1alpha/eval/benchmarks/{benchmark_id}:
|
/v1alpha/eval/benchmarks/{benchmark_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -3336,7 +3111,7 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: false
|
deprecated: true
|
||||||
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -6280,46 +6055,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: OpenAIListModelsResponse
|
title: OpenAIListModelsResponse
|
||||||
ModelType:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- llm
|
|
||||||
- embedding
|
|
||||||
- rerank
|
|
||||||
title: ModelType
|
|
||||||
description: >-
|
|
||||||
Enumeration of supported model types in Llama Stack.
|
|
||||||
RegisterModelRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
model_id:
|
|
||||||
type: string
|
|
||||||
description: The identifier of the model to register.
|
|
||||||
provider_model_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The identifier of the model in the provider.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: The identifier of the provider.
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: Any additional metadata for this model.
|
|
||||||
model_type:
|
|
||||||
$ref: '#/components/schemas/ModelType'
|
|
||||||
description: The type of model to register.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- model_id
|
|
||||||
title: RegisterModelRequest
|
|
||||||
Model:
|
Model:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -6377,6 +6112,15 @@ components:
|
||||||
title: Model
|
title: Model
|
||||||
description: >-
|
description: >-
|
||||||
A model resource representing an AI model registered in Llama Stack.
|
A model resource representing an AI model registered in Llama Stack.
|
||||||
|
ModelType:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- llm
|
||||||
|
- embedding
|
||||||
|
- rerank
|
||||||
|
title: ModelType
|
||||||
|
description: >-
|
||||||
|
Enumeration of supported model types in Llama Stack.
|
||||||
RunModerationRequest:
|
RunModerationRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -9119,61 +8863,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListScoringFunctionsResponse
|
title: ListScoringFunctionsResponse
|
||||||
ParamType:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/StringType'
|
|
||||||
- $ref: '#/components/schemas/NumberType'
|
|
||||||
- $ref: '#/components/schemas/BooleanType'
|
|
||||||
- $ref: '#/components/schemas/ArrayType'
|
|
||||||
- $ref: '#/components/schemas/ObjectType'
|
|
||||||
- $ref: '#/components/schemas/JsonType'
|
|
||||||
- $ref: '#/components/schemas/UnionType'
|
|
||||||
- $ref: '#/components/schemas/ChatCompletionInputType'
|
|
||||||
- $ref: '#/components/schemas/CompletionInputType'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
string: '#/components/schemas/StringType'
|
|
||||||
number: '#/components/schemas/NumberType'
|
|
||||||
boolean: '#/components/schemas/BooleanType'
|
|
||||||
array: '#/components/schemas/ArrayType'
|
|
||||||
object: '#/components/schemas/ObjectType'
|
|
||||||
json: '#/components/schemas/JsonType'
|
|
||||||
union: '#/components/schemas/UnionType'
|
|
||||||
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
|
|
||||||
completion_input: '#/components/schemas/CompletionInputType'
|
|
||||||
RegisterScoringFunctionRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
scoring_fn_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the scoring function to register.
|
|
||||||
description:
|
|
||||||
type: string
|
|
||||||
description: The description of the scoring function.
|
|
||||||
return_type:
|
|
||||||
$ref: '#/components/schemas/ParamType'
|
|
||||||
description: The return type of the scoring function.
|
|
||||||
provider_scoring_fn_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider scoring function to use for the scoring function.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider to use for the scoring function.
|
|
||||||
params:
|
|
||||||
$ref: '#/components/schemas/ScoringFnParams'
|
|
||||||
description: >-
|
|
||||||
The parameters for the scoring function for benchmark eval, these can
|
|
||||||
be overridden for app eval.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- scoring_fn_id
|
|
||||||
- description
|
|
||||||
- return_type
|
|
||||||
title: RegisterScoringFunctionRequest
|
|
||||||
ScoreRequest:
|
ScoreRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -9349,35 +9038,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListShieldsResponse
|
title: ListShieldsResponse
|
||||||
RegisterShieldRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
shield_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The identifier of the shield to register.
|
|
||||||
provider_shield_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The identifier of the shield in the provider.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: The identifier of the provider.
|
|
||||||
params:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: The parameters of the shield.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- shield_id
|
|
||||||
title: RegisterShieldRequest
|
|
||||||
InvokeToolRequest:
|
InvokeToolRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -9638,37 +9298,6 @@ components:
|
||||||
title: ListToolGroupsResponse
|
title: ListToolGroupsResponse
|
||||||
description: >-
|
description: >-
|
||||||
Response containing a list of tool groups.
|
Response containing a list of tool groups.
|
||||||
RegisterToolGroupRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
toolgroup_id:
|
|
||||||
type: string
|
|
||||||
description: The ID of the tool group to register.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider to use for the tool group.
|
|
||||||
mcp_endpoint:
|
|
||||||
$ref: '#/components/schemas/URL'
|
|
||||||
description: >-
|
|
||||||
The MCP endpoint to use for the tool group.
|
|
||||||
args:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
|
||||||
A dictionary of arguments to pass to the tool group.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- toolgroup_id
|
|
||||||
- provider_id
|
|
||||||
title: RegisterToolGroupRequest
|
|
||||||
Chunk:
|
Chunk:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -10469,41 +10098,35 @@ components:
|
||||||
title: VectorStoreContent
|
title: VectorStoreContent
|
||||||
description: >-
|
description: >-
|
||||||
Content item from a vector store file or search result.
|
Content item from a vector store file or search result.
|
||||||
VectorStoreFileContentsResponse:
|
VectorStoreFileContentResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
file_id:
|
object:
|
||||||
type: string
|
type: string
|
||||||
description: Unique identifier for the file
|
const: vector_store.file_content.page
|
||||||
filename:
|
default: vector_store.file_content.page
|
||||||
type: string
|
|
||||||
description: Name of the file
|
|
||||||
attributes:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
description: >-
|
||||||
Key-value attributes associated with the file
|
The object type, which is always `vector_store.file_content.page`
|
||||||
content:
|
data:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/VectorStoreContent'
|
$ref: '#/components/schemas/VectorStoreContent'
|
||||||
description: List of content items from the file
|
description: Parsed content of the file
|
||||||
|
has_more:
|
||||||
|
type: boolean
|
||||||
|
description: >-
|
||||||
|
Indicates if there are more content pages to fetch
|
||||||
|
next_page:
|
||||||
|
type: string
|
||||||
|
description: The token for the next page, if any
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- file_id
|
- object
|
||||||
- filename
|
- data
|
||||||
- attributes
|
- has_more
|
||||||
- content
|
title: VectorStoreFileContentResponse
|
||||||
title: VectorStoreFileContentsResponse
|
|
||||||
description: >-
|
description: >-
|
||||||
Response from retrieving the contents of a vector store file.
|
Represents the parsed content of a vector store file.
|
||||||
OpenaiSearchVectorStoreRequest:
|
OpenaiSearchVectorStoreRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -10820,68 +10443,6 @@ components:
|
||||||
- data
|
- data
|
||||||
title: ListDatasetsResponse
|
title: ListDatasetsResponse
|
||||||
description: Response from listing datasets.
|
description: Response from listing datasets.
|
||||||
DataSource:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/URIDataSource'
|
|
||||||
- $ref: '#/components/schemas/RowsDataSource'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
uri: '#/components/schemas/URIDataSource'
|
|
||||||
rows: '#/components/schemas/RowsDataSource'
|
|
||||||
RegisterDatasetRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
purpose:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- post-training/messages
|
|
||||||
- eval/question-answer
|
|
||||||
- eval/messages-answer
|
|
||||||
description: >-
|
|
||||||
The purpose of the dataset. One of: - "post-training/messages": The dataset
|
|
||||||
contains a messages column with list of messages for post-training. {
|
|
||||||
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
|
|
||||||
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
|
|
||||||
contains a question column and an answer column for evaluation. { "question":
|
|
||||||
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
|
|
||||||
The dataset contains a messages column with list of messages and an answer
|
|
||||||
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
|
|
||||||
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
|
|
||||||
Doe. How can I help you today?"}, {"role": "user", "content": "What's
|
|
||||||
my name?"}, ], "answer": "John Doe" }
|
|
||||||
source:
|
|
||||||
$ref: '#/components/schemas/DataSource'
|
|
||||||
description: >-
|
|
||||||
The data source of the dataset. Ensure that the data source schema is
|
|
||||||
compatible with the purpose of the dataset. Examples: - { "type": "uri",
|
|
||||||
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
|
|
||||||
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
|
|
||||||
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
|
|
||||||
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
|
|
||||||
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
|
|
||||||
} ] }
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
|
||||||
The metadata for the dataset. - E.g. {"description": "My dataset"}.
|
|
||||||
dataset_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the dataset. If not provided, an ID will be generated.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- purpose
|
|
||||||
- source
|
|
||||||
title: RegisterDatasetRequest
|
|
||||||
Benchmark:
|
Benchmark:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -10949,47 +10510,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListBenchmarksResponse
|
title: ListBenchmarksResponse
|
||||||
RegisterBenchmarkRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
benchmark_id:
|
|
||||||
type: string
|
|
||||||
description: The ID of the benchmark to register.
|
|
||||||
dataset_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the dataset to use for the benchmark.
|
|
||||||
scoring_functions:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The scoring functions to use for the benchmark.
|
|
||||||
provider_benchmark_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider benchmark to use for the benchmark.
|
|
||||||
provider_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The ID of the provider to use for the benchmark.
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: The metadata to use for the benchmark.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- benchmark_id
|
|
||||||
- dataset_id
|
|
||||||
- scoring_functions
|
|
||||||
title: RegisterBenchmarkRequest
|
|
||||||
BenchmarkConfig:
|
BenchmarkConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -11851,6 +11371,109 @@ components:
|
||||||
- hyperparam_search_config
|
- hyperparam_search_config
|
||||||
- logger_config
|
- logger_config
|
||||||
title: SupervisedFineTuneRequest
|
title: SupervisedFineTuneRequest
|
||||||
|
DataSource:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/URIDataSource'
|
||||||
|
- $ref: '#/components/schemas/RowsDataSource'
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
uri: '#/components/schemas/URIDataSource'
|
||||||
|
rows: '#/components/schemas/RowsDataSource'
|
||||||
|
RegisterDatasetRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
purpose:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- post-training/messages
|
||||||
|
- eval/question-answer
|
||||||
|
- eval/messages-answer
|
||||||
|
description: >-
|
||||||
|
The purpose of the dataset. One of: - "post-training/messages": The dataset
|
||||||
|
contains a messages column with list of messages for post-training. {
|
||||||
|
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
|
||||||
|
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
|
||||||
|
contains a question column and an answer column for evaluation. { "question":
|
||||||
|
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
|
||||||
|
The dataset contains a messages column with list of messages and an answer
|
||||||
|
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
|
||||||
|
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
|
||||||
|
Doe. How can I help you today?"}, {"role": "user", "content": "What's
|
||||||
|
my name?"}, ], "answer": "John Doe" }
|
||||||
|
source:
|
||||||
|
$ref: '#/components/schemas/DataSource'
|
||||||
|
description: >-
|
||||||
|
The data source of the dataset. Ensure that the data source schema is
|
||||||
|
compatible with the purpose of the dataset. Examples: - { "type": "uri",
|
||||||
|
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
|
||||||
|
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
|
||||||
|
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
|
||||||
|
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
|
||||||
|
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
|
||||||
|
} ] }
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
description: >-
|
||||||
|
The metadata for the dataset. - E.g. {"description": "My dataset"}.
|
||||||
|
dataset_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the dataset. If not provided, an ID will be generated.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- purpose
|
||||||
|
- source
|
||||||
|
title: RegisterDatasetRequest
|
||||||
|
RegisterBenchmarkRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
benchmark_id:
|
||||||
|
type: string
|
||||||
|
description: The ID of the benchmark to register.
|
||||||
|
dataset_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the dataset to use for the benchmark.
|
||||||
|
scoring_functions:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The scoring functions to use for the benchmark.
|
||||||
|
provider_benchmark_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the provider benchmark to use for the benchmark.
|
||||||
|
provider_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the provider to use for the benchmark.
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
description: The metadata to use for the benchmark.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- benchmark_id
|
||||||
|
- dataset_id
|
||||||
|
- scoring_functions
|
||||||
|
title: RegisterBenchmarkRequest
|
||||||
responses:
|
responses:
|
||||||
BadRequest400:
|
BadRequest400:
|
||||||
description: The request was invalid or malformed
|
description: The request was invalid or malformed
|
||||||
|
|
|
||||||
|
|
@ -298,6 +298,7 @@ exclude = [
|
||||||
"^src/llama_stack/providers/remote/agents/sample/",
|
"^src/llama_stack/providers/remote/agents/sample/",
|
||||||
"^src/llama_stack/providers/remote/datasetio/huggingface/",
|
"^src/llama_stack/providers/remote/datasetio/huggingface/",
|
||||||
"^src/llama_stack/providers/remote/datasetio/nvidia/",
|
"^src/llama_stack/providers/remote/datasetio/nvidia/",
|
||||||
|
"^src/llama_stack/providers/remote/inference/oci/",
|
||||||
"^src/llama_stack/providers/remote/inference/bedrock/",
|
"^src/llama_stack/providers/remote/inference/bedrock/",
|
||||||
"^src/llama_stack/providers/remote/inference/nvidia/",
|
"^src/llama_stack/providers/remote/inference/nvidia/",
|
||||||
"^src/llama_stack/providers/remote/inference/passthrough/",
|
"^src/llama_stack/providers/remote/inference/passthrough/",
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ class Benchmarks(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
|
||||||
async def register_benchmark(
|
async def register_benchmark(
|
||||||
self,
|
self,
|
||||||
benchmark_id: str,
|
benchmark_id: str,
|
||||||
|
|
@ -95,7 +95,7 @@ class Benchmarks(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
|
||||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||||
"""Unregister a benchmark.
|
"""Unregister a benchmark.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -146,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class Datasets(Protocol):
|
class Datasets(Protocol):
|
||||||
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
|
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA, deprecated=True)
|
||||||
async def register_dataset(
|
async def register_dataset(
|
||||||
self,
|
self,
|
||||||
purpose: DatasetPurpose,
|
purpose: DatasetPurpose,
|
||||||
|
|
@ -235,7 +235,7 @@ class Datasets(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
|
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA, deprecated=True)
|
||||||
async def unregister_dataset(
|
async def unregister_dataset(
|
||||||
self,
|
self,
|
||||||
dataset_id: str,
|
dataset_id: str,
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,7 @@ class Models(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def register_model(
|
async def register_model(
|
||||||
self,
|
self,
|
||||||
model_id: str,
|
model_id: str,
|
||||||
|
|
@ -158,7 +158,7 @@ class Models(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def unregister_model(
|
async def unregister_model(
|
||||||
self,
|
self,
|
||||||
model_id: str,
|
model_id: str,
|
||||||
|
|
|
||||||
|
|
@ -178,7 +178,7 @@ class ScoringFunctions(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def register_scoring_function(
|
async def register_scoring_function(
|
||||||
self,
|
self,
|
||||||
scoring_fn_id: str,
|
scoring_fn_id: str,
|
||||||
|
|
@ -199,7 +199,9 @@ class ScoringFunctions(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(
|
||||||
|
route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
|
||||||
|
)
|
||||||
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
||||||
"""Unregister a scoring function.
|
"""Unregister a scoring function.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ class Shields(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def register_shield(
|
async def register_shield(
|
||||||
self,
|
self,
|
||||||
shield_id: str,
|
shield_id: str,
|
||||||
|
|
@ -85,7 +85,7 @@ class Shields(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def unregister_shield(self, identifier: str) -> None:
|
async def unregister_shield(self, identifier: str) -> None:
|
||||||
"""Unregister a shield.
|
"""Unregister a shield.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,7 @@ class ListToolDefsResponse(BaseModel):
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
@telemetry_traceable
|
||||||
class ToolGroups(Protocol):
|
class ToolGroups(Protocol):
|
||||||
@webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def register_tool_group(
|
async def register_tool_group(
|
||||||
self,
|
self,
|
||||||
toolgroup_id: str,
|
toolgroup_id: str,
|
||||||
|
|
@ -167,7 +167,7 @@ class ToolGroups(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def unregister_toolgroup(
|
async def unregister_toolgroup(
|
||||||
self,
|
self,
|
||||||
toolgroup_id: str,
|
toolgroup_id: str,
|
||||||
|
|
|
||||||
|
|
@ -396,19 +396,19 @@ class VectorStoreListFilesResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class VectorStoreFileContentsResponse(BaseModel):
|
class VectorStoreFileContentResponse(BaseModel):
|
||||||
"""Response from retrieving the contents of a vector store file.
|
"""Represents the parsed content of a vector store file.
|
||||||
|
|
||||||
:param file_id: Unique identifier for the file
|
:param object: The object type, which is always `vector_store.file_content.page`
|
||||||
:param filename: Name of the file
|
:param data: Parsed content of the file
|
||||||
:param attributes: Key-value attributes associated with the file
|
:param has_more: Indicates if there are more content pages to fetch
|
||||||
:param content: List of content items from the file
|
:param next_page: The token for the next page, if any
|
||||||
"""
|
"""
|
||||||
|
|
||||||
file_id: str
|
object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
|
||||||
filename: str
|
data: list[VectorStoreContent]
|
||||||
attributes: dict[str, Any]
|
has_more: bool
|
||||||
content: list[VectorStoreContent]
|
next_page: str | None = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
@ -732,12 +732,12 @@ class VectorIO(Protocol):
|
||||||
self,
|
self,
|
||||||
vector_store_id: str,
|
vector_store_id: str,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
) -> VectorStoreFileContentsResponse:
|
) -> VectorStoreFileContentResponse:
|
||||||
"""Retrieves the contents of a vector store file.
|
"""Retrieves the contents of a vector store file.
|
||||||
|
|
||||||
:param vector_store_id: The ID of the vector store containing the file to retrieve.
|
:param vector_store_id: The ID of the vector store containing the file to retrieve.
|
||||||
:param file_id: The ID of the file to retrieve.
|
:param file_id: The ID of the file to retrieve.
|
||||||
:returns: A list of InterleavedContent representing the file contents.
|
:returns: A VectorStoreFileContentResponse representing the file contents.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
|
||||||
VectorStoreChunkingStrategyStaticConfig,
|
VectorStoreChunkingStrategyStaticConfig,
|
||||||
VectorStoreDeleteResponse,
|
VectorStoreDeleteResponse,
|
||||||
VectorStoreFileBatchObject,
|
VectorStoreFileBatchObject,
|
||||||
VectorStoreFileContentsResponse,
|
VectorStoreFileContentResponse,
|
||||||
VectorStoreFileDeleteResponse,
|
VectorStoreFileDeleteResponse,
|
||||||
VectorStoreFileObject,
|
VectorStoreFileObject,
|
||||||
VectorStoreFilesListInBatchResponse,
|
VectorStoreFilesListInBatchResponse,
|
||||||
|
|
@ -338,7 +338,7 @@ class VectorIORouter(VectorIO):
|
||||||
self,
|
self,
|
||||||
vector_store_id: str,
|
vector_store_id: str,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
) -> VectorStoreFileContentsResponse:
|
) -> VectorStoreFileContentResponse:
|
||||||
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
|
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
|
||||||
provider = await self.routing_table.get_provider_impl(vector_store_id)
|
provider = await self.routing_table.get_provider_impl(vector_store_id)
|
||||||
return await provider.openai_retrieve_vector_store_file_contents(
|
return await provider.openai_retrieve_vector_store_file_contents(
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
|
||||||
SearchRankingOptions,
|
SearchRankingOptions,
|
||||||
VectorStoreChunkingStrategy,
|
VectorStoreChunkingStrategy,
|
||||||
VectorStoreDeleteResponse,
|
VectorStoreDeleteResponse,
|
||||||
VectorStoreFileContentsResponse,
|
VectorStoreFileContentResponse,
|
||||||
VectorStoreFileDeleteResponse,
|
VectorStoreFileDeleteResponse,
|
||||||
VectorStoreFileObject,
|
VectorStoreFileObject,
|
||||||
VectorStoreFileStatus,
|
VectorStoreFileStatus,
|
||||||
|
|
@ -195,7 +195,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
||||||
self,
|
self,
|
||||||
vector_store_id: str,
|
vector_store_id: str,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
) -> VectorStoreFileContentsResponse:
|
) -> VectorStoreFileContentResponse:
|
||||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||||
provider = await self.get_provider_impl(vector_store_id)
|
provider = await self.get_provider_impl(vector_store_id)
|
||||||
return await provider.openai_retrieve_vector_store_file_contents(
|
return await provider.openai_retrieve_vector_store_file_contents(
|
||||||
|
|
|
||||||
7
src/llama_stack/distributions/oci/__init__.py
Normal file
7
src/llama_stack/distributions/oci/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from .oci import get_distribution_template # noqa: F401
|
||||||
35
src/llama_stack/distributions/oci/build.yaml
Normal file
35
src/llama_stack/distributions/oci/build.yaml
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
version: 2
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM
|
||||||
|
inference with scalable cloud services
|
||||||
|
providers:
|
||||||
|
inference:
|
||||||
|
- provider_type: remote::oci
|
||||||
|
vector_io:
|
||||||
|
- provider_type: inline::faiss
|
||||||
|
- provider_type: remote::chromadb
|
||||||
|
- provider_type: remote::pgvector
|
||||||
|
safety:
|
||||||
|
- provider_type: inline::llama-guard
|
||||||
|
agents:
|
||||||
|
- provider_type: inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- provider_type: inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- provider_type: remote::huggingface
|
||||||
|
- provider_type: inline::localfs
|
||||||
|
scoring:
|
||||||
|
- provider_type: inline::basic
|
||||||
|
- provider_type: inline::llm-as-judge
|
||||||
|
- provider_type: inline::braintrust
|
||||||
|
tool_runtime:
|
||||||
|
- provider_type: remote::brave-search
|
||||||
|
- provider_type: remote::tavily-search
|
||||||
|
- provider_type: inline::rag-runtime
|
||||||
|
- provider_type: remote::model-context-protocol
|
||||||
|
files:
|
||||||
|
- provider_type: inline::localfs
|
||||||
|
image_type: venv
|
||||||
|
additional_pip_packages:
|
||||||
|
- aiosqlite
|
||||||
|
- sqlalchemy[asyncio]
|
||||||
140
src/llama_stack/distributions/oci/doc_template.md
Normal file
140
src/llama_stack/distributions/oci/doc_template.md
Normal file
|
|
@ -0,0 +1,140 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
# OCI Distribution
|
||||||
|
|
||||||
|
The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
|
||||||
|
|
||||||
|
{{ providers_table }}
|
||||||
|
|
||||||
|
{% if run_config_env_vars %}
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
The following environment variables can be configured:
|
||||||
|
|
||||||
|
{% for var, (default_value, description) in run_config_env_vars.items() %}
|
||||||
|
- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if default_models %}
|
||||||
|
### Models
|
||||||
|
|
||||||
|
The following models are available by default:
|
||||||
|
|
||||||
|
{% for model in default_models %}
|
||||||
|
- `{{ model.model_id }} {{ model.doc_string }}`
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
### Oracle Cloud Infrastructure Setup
|
||||||
|
|
||||||
|
Before using the OCI Generative AI distribution, ensure you have:
|
||||||
|
|
||||||
|
1. **Oracle Cloud Infrastructure Account**: Sign up at [Oracle Cloud Infrastructure](https://cloud.oracle.com/)
|
||||||
|
2. **Generative AI Service Access**: Enable the Generative AI service in your OCI tenancy
|
||||||
|
3. **Compartment**: Create or identify a compartment where you'll deploy Generative AI models
|
||||||
|
4. **Authentication**: Configure authentication using either:
|
||||||
|
- **Instance Principal** (recommended for cloud-hosted deployments)
|
||||||
|
- **API Key** (for on-premises or development environments)
|
||||||
|
|
||||||
|
### Authentication Methods
|
||||||
|
|
||||||
|
#### Instance Principal Authentication (Recommended)
|
||||||
|
Instance Principal authentication allows OCI resources to authenticate using the identity of the compute instance they're running on. This is the most secure method for production deployments.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Instance must be running in an Oracle Cloud Infrastructure compartment
|
||||||
|
- Instance must have appropriate IAM policies to access Generative AI services
|
||||||
|
|
||||||
|
#### API Key Authentication
|
||||||
|
For development or on-premises deployments, follow [this doc](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm) to learn how to create your API signing key for your config file.
|
||||||
|
|
||||||
|
### Required IAM Policies
|
||||||
|
|
||||||
|
Ensure your OCI user or instance has the following policy statements:
|
||||||
|
|
||||||
|
```
|
||||||
|
Allow group <group_name> to use generative-ai-inference-endpoints in compartment <compartment_name>
|
||||||
|
Allow group <group_name> to manage generative-ai-inference-endpoints in compartment <compartment_name>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Supported Services
|
||||||
|
|
||||||
|
### Inference: OCI Generative AI
|
||||||
|
Oracle Cloud Infrastructure Generative AI provides access to high-performance AI models through OCI's Platform-as-a-Service offering. The service supports:
|
||||||
|
|
||||||
|
- **Chat Completions**: Conversational AI with context awareness
|
||||||
|
- **Text Generation**: Complete prompts and generate text content
|
||||||
|
|
||||||
|
#### Available Models
|
||||||
|
Common OCI Generative AI models include access to Meta, Cohere, OpenAI, Grok, and more models.
|
||||||
|
|
||||||
|
### Safety: Llama Guard
|
||||||
|
For content safety and moderation, this distribution uses Meta's LlamaGuard model through the OCI Generative AI service to provide:
|
||||||
|
- Content filtering and moderation
|
||||||
|
- Policy compliance checking
|
||||||
|
- Harmful content detection
|
||||||
|
|
||||||
|
### Vector Storage: Multiple Options
|
||||||
|
The distribution supports several vector storage providers:
|
||||||
|
- **FAISS**: Local in-memory vector search
|
||||||
|
- **ChromaDB**: Distributed vector database
|
||||||
|
- **PGVector**: PostgreSQL with vector extensions
|
||||||
|
|
||||||
|
### Additional Services
|
||||||
|
- **Dataset I/O**: Local filesystem and Hugging Face integration
|
||||||
|
- **Tool Runtime**: Web search (Brave, Tavily) and RAG capabilities
|
||||||
|
- **Evaluation**: Meta reference evaluation framework
|
||||||
|
|
||||||
|
## Running Llama Stack with OCI
|
||||||
|
|
||||||
|
You can run the OCI distribution via Docker or local virtual environment.
|
||||||
|
|
||||||
|
### Via venv
|
||||||
|
|
||||||
|
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
OCI_AUTH=$OCI_AUTH_TYPE OCI_REGION=$OCI_REGION OCI_COMPARTMENT_OCID=$OCI_COMPARTMENT_OCID llama stack run --port 8321 oci
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration Examples
|
||||||
|
|
||||||
|
#### Using Instance Principal (Recommended for Production)
|
||||||
|
```bash
|
||||||
|
export OCI_AUTH_TYPE=instance_principal
|
||||||
|
export OCI_REGION=us-chicago-1
|
||||||
|
export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..<your-compartment-id>
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Using API Key Authentication (Development)
|
||||||
|
```bash
|
||||||
|
export OCI_AUTH_TYPE=config_file
|
||||||
|
export OCI_CONFIG_FILE_PATH=~/.oci/config
|
||||||
|
export OCI_CLI_PROFILE=DEFAULT
|
||||||
|
export OCI_REGION=us-chicago-1
|
||||||
|
export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..your-compartment-id
|
||||||
|
```
|
||||||
|
|
||||||
|
## Regional Endpoints
|
||||||
|
|
||||||
|
OCI Generative AI is available in multiple regions. The service automatically routes to the appropriate regional endpoint based on your configuration. For a full list of regional model availability, visit:
|
||||||
|
|
||||||
|
https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm#regions
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **Authentication Errors**: Verify your OCI credentials and IAM policies
|
||||||
|
2. **Model Not Found**: Ensure the model OCID is correct and the model is available in your region
|
||||||
|
3. **Permission Denied**: Check compartment permissions and Generative AI service access
|
||||||
|
4. **Region Unavailable**: Verify the specified region supports Generative AI services
|
||||||
|
|
||||||
|
### Getting Help
|
||||||
|
|
||||||
|
For additional support:
|
||||||
|
- [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm)
|
||||||
|
- [Llama Stack Issues](https://github.com/meta-llama/llama-stack/issues)
|
||||||
108
src/llama_stack/distributions/oci/oci.py
Normal file
108
src/llama_stack/distributions/oci/oci.py
Normal file
|
|
@ -0,0 +1,108 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
|
||||||
|
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
||||||
|
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
||||||
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
|
from llama_stack.providers.remote.inference.oci.config import OCIConfig
|
||||||
|
|
||||||
|
|
||||||
|
def get_distribution_template(name: str = "oci") -> DistributionTemplate:
|
||||||
|
providers = {
|
||||||
|
"inference": [BuildProvider(provider_type="remote::oci")],
|
||||||
|
"vector_io": [
|
||||||
|
BuildProvider(provider_type="inline::faiss"),
|
||||||
|
BuildProvider(provider_type="remote::chromadb"),
|
||||||
|
BuildProvider(provider_type="remote::pgvector"),
|
||||||
|
],
|
||||||
|
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||||
|
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
|
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
|
"datasetio": [
|
||||||
|
BuildProvider(provider_type="remote::huggingface"),
|
||||||
|
BuildProvider(provider_type="inline::localfs"),
|
||||||
|
],
|
||||||
|
"scoring": [
|
||||||
|
BuildProvider(provider_type="inline::basic"),
|
||||||
|
BuildProvider(provider_type="inline::llm-as-judge"),
|
||||||
|
BuildProvider(provider_type="inline::braintrust"),
|
||||||
|
],
|
||||||
|
"tool_runtime": [
|
||||||
|
BuildProvider(provider_type="remote::brave-search"),
|
||||||
|
BuildProvider(provider_type="remote::tavily-search"),
|
||||||
|
BuildProvider(provider_type="inline::rag-runtime"),
|
||||||
|
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||||
|
],
|
||||||
|
"files": [BuildProvider(provider_type="inline::localfs")],
|
||||||
|
}
|
||||||
|
|
||||||
|
inference_provider = Provider(
|
||||||
|
provider_id="oci",
|
||||||
|
provider_type="remote::oci",
|
||||||
|
config=OCIConfig.sample_run_config(),
|
||||||
|
)
|
||||||
|
|
||||||
|
vector_io_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
|
files_provider = Provider(
|
||||||
|
provider_id="meta-reference-files",
|
||||||
|
provider_type="inline::localfs",
|
||||||
|
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||||
|
)
|
||||||
|
default_tool_groups = [
|
||||||
|
ToolGroupInput(
|
||||||
|
toolgroup_id="builtin::websearch",
|
||||||
|
provider_id="tavily-search",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
return DistributionTemplate(
|
||||||
|
name=name,
|
||||||
|
distro_type="remote_hosted",
|
||||||
|
description="Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM inference with scalable cloud services",
|
||||||
|
container_image=None,
|
||||||
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
|
providers=providers,
|
||||||
|
run_configs={
|
||||||
|
"run.yaml": RunConfigSettings(
|
||||||
|
provider_overrides={
|
||||||
|
"inference": [inference_provider],
|
||||||
|
"vector_io": [vector_io_provider],
|
||||||
|
"files": [files_provider],
|
||||||
|
},
|
||||||
|
default_tool_groups=default_tool_groups,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
run_config_env_vars={
|
||||||
|
"OCI_AUTH_TYPE": (
|
||||||
|
"instance_principal",
|
||||||
|
"OCI authentication type (instance_principal or config_file)",
|
||||||
|
),
|
||||||
|
"OCI_REGION": (
|
||||||
|
"",
|
||||||
|
"OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1)",
|
||||||
|
),
|
||||||
|
"OCI_COMPARTMENT_OCID": (
|
||||||
|
"",
|
||||||
|
"OCI compartment ID for the Generative AI service",
|
||||||
|
),
|
||||||
|
"OCI_CONFIG_FILE_PATH": (
|
||||||
|
"~/.oci/config",
|
||||||
|
"OCI config file path (required if OCI_AUTH_TYPE is config_file)",
|
||||||
|
),
|
||||||
|
"OCI_CLI_PROFILE": (
|
||||||
|
"DEFAULT",
|
||||||
|
"OCI CLI profile name to use from config file",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
136
src/llama_stack/distributions/oci/run.yaml
Normal file
136
src/llama_stack/distributions/oci/run.yaml
Normal file
|
|
@ -0,0 +1,136 @@
|
||||||
|
version: 2
|
||||||
|
image_name: oci
|
||||||
|
apis:
|
||||||
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
|
- files
|
||||||
|
- inference
|
||||||
|
- safety
|
||||||
|
- scoring
|
||||||
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
|
providers:
|
||||||
|
inference:
|
||||||
|
- provider_id: oci
|
||||||
|
provider_type: remote::oci
|
||||||
|
config:
|
||||||
|
oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal}
|
||||||
|
oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}
|
||||||
|
oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT}
|
||||||
|
oci_region: ${env.OCI_REGION:=us-ashburn-1}
|
||||||
|
oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=}
|
||||||
|
vector_io:
|
||||||
|
- provider_id: faiss
|
||||||
|
provider_type: inline::faiss
|
||||||
|
config:
|
||||||
|
persistence:
|
||||||
|
namespace: vector_io::faiss
|
||||||
|
backend: kv_default
|
||||||
|
safety:
|
||||||
|
- provider_id: llama-guard
|
||||||
|
provider_type: inline::llama-guard
|
||||||
|
config:
|
||||||
|
excluded_categories: []
|
||||||
|
agents:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
persistence:
|
||||||
|
agent_state:
|
||||||
|
namespace: agents
|
||||||
|
backend: kv_default
|
||||||
|
responses:
|
||||||
|
table_name: responses
|
||||||
|
backend: sql_default
|
||||||
|
max_write_queue_size: 10000
|
||||||
|
num_writers: 4
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
kvstore:
|
||||||
|
namespace: eval
|
||||||
|
backend: kv_default
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config:
|
||||||
|
kvstore:
|
||||||
|
namespace: datasetio::huggingface
|
||||||
|
backend: kv_default
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
kvstore:
|
||||||
|
namespace: datasetio::localfs
|
||||||
|
backend: kv_default
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config:
|
||||||
|
openai_api_key: ${env.OPENAI_API_KEY:=}
|
||||||
|
tool_runtime:
|
||||||
|
- provider_id: brave-search
|
||||||
|
provider_type: remote::brave-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: tavily-search
|
||||||
|
provider_type: remote::tavily-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: rag-runtime
|
||||||
|
provider_type: inline::rag-runtime
|
||||||
|
- provider_id: model-context-protocol
|
||||||
|
provider_type: remote::model-context-protocol
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/oci/files}
|
||||||
|
metadata_store:
|
||||||
|
table_name: files_metadata
|
||||||
|
backend: sql_default
|
||||||
|
storage:
|
||||||
|
backends:
|
||||||
|
kv_default:
|
||||||
|
type: kv_sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/kvstore.db
|
||||||
|
sql_default:
|
||||||
|
type: sql_sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/sql_store.db
|
||||||
|
stores:
|
||||||
|
metadata:
|
||||||
|
namespace: registry
|
||||||
|
backend: kv_default
|
||||||
|
inference:
|
||||||
|
table_name: inference_store
|
||||||
|
backend: sql_default
|
||||||
|
max_write_queue_size: 10000
|
||||||
|
num_writers: 4
|
||||||
|
conversations:
|
||||||
|
table_name: openai_conversations
|
||||||
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
|
registered_resources:
|
||||||
|
models: []
|
||||||
|
shields: []
|
||||||
|
vector_dbs: []
|
||||||
|
datasets: []
|
||||||
|
scoring_fns: []
|
||||||
|
benchmarks: []
|
||||||
|
tool_groups:
|
||||||
|
- toolgroup_id: builtin::websearch
|
||||||
|
provider_id: tavily-search
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
telemetry:
|
||||||
|
enabled: true
|
||||||
|
|
@ -223,7 +223,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
|
||||||
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
||||||
|
|
||||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||||
assert self.kvstore is not None
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
|
||||||
|
|
||||||
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
||||||
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
||||||
|
|
@ -239,7 +240,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
|
||||||
return [i.vector_store for i in self.cache.values()]
|
return [i.vector_store for i in self.cache.values()]
|
||||||
|
|
||||||
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
||||||
assert self.kvstore is not None
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
|
||||||
|
|
||||||
if vector_store_id not in self.cache:
|
if vector_store_id not in self.cache:
|
||||||
return
|
return
|
||||||
|
|
@ -248,6 +250,27 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
|
||||||
del self.cache[vector_store_id]
|
del self.cache[vector_store_id]
|
||||||
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
||||||
|
|
||||||
|
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
|
||||||
|
if vector_store_id in self.cache:
|
||||||
|
return self.cache[vector_store_id]
|
||||||
|
|
||||||
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
||||||
|
|
||||||
|
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
||||||
|
vector_store_data = await self.kvstore.get(key)
|
||||||
|
if not vector_store_data:
|
||||||
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
|
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||||
|
index = VectorStoreWithIndex(
|
||||||
|
vector_store=vector_store,
|
||||||
|
index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
|
||||||
|
inference_api=self.inference_api,
|
||||||
|
)
|
||||||
|
self.cache[vector_store_id] = index
|
||||||
|
return index
|
||||||
|
|
||||||
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||||
index = self.cache.get(vector_store_id)
|
index = self.cache.get(vector_store_id)
|
||||||
if index is None:
|
if index is None:
|
||||||
|
|
|
||||||
|
|
@ -412,6 +412,14 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
|
||||||
return [v.vector_store for v in self.cache.values()]
|
return [v.vector_store for v in self.cache.values()]
|
||||||
|
|
||||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||||
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
|
||||||
|
|
||||||
|
# Save to kvstore for persistence
|
||||||
|
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
||||||
|
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
||||||
|
|
||||||
|
# Create and cache the index
|
||||||
index = await SQLiteVecIndex.create(
|
index = await SQLiteVecIndex.create(
|
||||||
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
|
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
|
||||||
)
|
)
|
||||||
|
|
@ -421,13 +429,16 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
|
||||||
if vector_store_id in self.cache:
|
if vector_store_id in self.cache:
|
||||||
return self.cache[vector_store_id]
|
return self.cache[vector_store_id]
|
||||||
|
|
||||||
if self.vector_store_table is None:
|
# Try to load from kvstore
|
||||||
raise VectorStoreNotFoundError(vector_store_id)
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
||||||
vector_store = self.vector_store_table.get_vector_store(vector_store_id)
|
|
||||||
if not vector_store:
|
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
||||||
|
vector_store_data = await self.kvstore.get(key)
|
||||||
|
if not vector_store_data:
|
||||||
raise VectorStoreNotFoundError(vector_store_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
|
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||||
index = VectorStoreWithIndex(
|
index = VectorStoreWithIndex(
|
||||||
vector_store=vector_store,
|
vector_store=vector_store,
|
||||||
index=SQLiteVecIndex(
|
index=SQLiteVecIndex(
|
||||||
|
|
|
||||||
|
|
@ -297,6 +297,20 @@ Available Models:
|
||||||
Azure OpenAI inference provider for accessing GPT models and other Azure services.
|
Azure OpenAI inference provider for accessing GPT models and other Azure services.
|
||||||
Provider documentation
|
Provider documentation
|
||||||
https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
||||||
|
""",
|
||||||
|
),
|
||||||
|
RemoteProviderSpec(
|
||||||
|
api=Api.inference,
|
||||||
|
provider_type="remote::oci",
|
||||||
|
adapter_type="oci",
|
||||||
|
pip_packages=["oci"],
|
||||||
|
module="llama_stack.providers.remote.inference.oci",
|
||||||
|
config_class="llama_stack.providers.remote.inference.oci.config.OCIConfig",
|
||||||
|
provider_data_validator="llama_stack.providers.remote.inference.oci.config.OCIProviderDataValidator",
|
||||||
|
description="""
|
||||||
|
Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
|
||||||
|
Provider documentation
|
||||||
|
https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
|
||||||
""",
|
""",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
||||||
17
src/llama_stack/providers/remote/inference/oci/__init__.py
Normal file
17
src/llama_stack/providers/remote/inference/oci/__init__.py
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
|
from .config import OCIConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def get_adapter_impl(config: OCIConfig, _deps) -> InferenceProvider:
|
||||||
|
from .oci import OCIInferenceAdapter
|
||||||
|
|
||||||
|
adapter = OCIInferenceAdapter(config=config)
|
||||||
|
await adapter.initialize()
|
||||||
|
return adapter
|
||||||
79
src/llama_stack/providers/remote/inference/oci/auth.py
Normal file
79
src/llama_stack/providers/remote/inference/oci/auth.py
Normal file
|
|
@ -0,0 +1,79 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from collections.abc import Generator, Mapping
|
||||||
|
from typing import Any, override
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import oci
|
||||||
|
import requests
|
||||||
|
from oci.config import DEFAULT_LOCATION, DEFAULT_PROFILE
|
||||||
|
|
||||||
|
OciAuthSigner = type[oci.signer.AbstractBaseSigner]
|
||||||
|
|
||||||
|
|
||||||
|
class HttpxOciAuth(httpx.Auth):
|
||||||
|
"""
|
||||||
|
Custom HTTPX authentication class that implements OCI request signing.
|
||||||
|
|
||||||
|
This class handles the authentication flow for HTTPX requests by signing them
|
||||||
|
using the OCI Signer, which adds the necessary authentication headers for
|
||||||
|
OCI API calls.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
signer (oci.signer.Signer): The OCI signer instance used for request signing
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, signer: OciAuthSigner):
|
||||||
|
self.signer = signer
|
||||||
|
|
||||||
|
@override
|
||||||
|
def auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, httpx.Response, None]:
|
||||||
|
# Read the request content to handle streaming requests properly
|
||||||
|
try:
|
||||||
|
content = request.content
|
||||||
|
except httpx.RequestNotRead:
|
||||||
|
# For streaming requests, we need to read the content first
|
||||||
|
content = request.read()
|
||||||
|
|
||||||
|
req = requests.Request(
|
||||||
|
method=request.method,
|
||||||
|
url=str(request.url),
|
||||||
|
headers=dict(request.headers),
|
||||||
|
data=content,
|
||||||
|
)
|
||||||
|
prepared_request = req.prepare()
|
||||||
|
|
||||||
|
# Sign the request using the OCI Signer
|
||||||
|
self.signer.do_request_sign(prepared_request) # type: ignore
|
||||||
|
|
||||||
|
# Update the original HTTPX request with the signed headers
|
||||||
|
request.headers.update(prepared_request.headers)
|
||||||
|
|
||||||
|
yield request
|
||||||
|
|
||||||
|
|
||||||
|
class OciInstancePrincipalAuth(HttpxOciAuth):
|
||||||
|
def __init__(self, **kwargs: Mapping[str, Any]):
|
||||||
|
self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class OciUserPrincipalAuth(HttpxOciAuth):
|
||||||
|
def __init__(self, config_file: str = DEFAULT_LOCATION, profile_name: str = DEFAULT_PROFILE):
|
||||||
|
config = oci.config.from_file(config_file, profile_name)
|
||||||
|
oci.config.validate_config(config) # type: ignore
|
||||||
|
key_content = ""
|
||||||
|
with open(config["key_file"]) as f:
|
||||||
|
key_content = f.read()
|
||||||
|
|
||||||
|
self.signer = oci.signer.Signer(
|
||||||
|
tenancy=config["tenancy"],
|
||||||
|
user=config["user"],
|
||||||
|
fingerprint=config["fingerprint"],
|
||||||
|
private_key_file_location=config.get("key_file"),
|
||||||
|
pass_phrase="none", # type: ignore
|
||||||
|
private_key_content=key_content,
|
||||||
|
)
|
||||||
75
src/llama_stack/providers/remote/inference/oci/config.py
Normal file
75
src/llama_stack/providers/remote/inference/oci/config.py
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||||
|
from llama_stack.schema_utils import json_schema_type
|
||||||
|
|
||||||
|
|
||||||
|
class OCIProviderDataValidator(BaseModel):
|
||||||
|
oci_auth_type: str = Field(
|
||||||
|
description="OCI authentication type (must be one of: instance_principal, config_file)",
|
||||||
|
)
|
||||||
|
oci_region: str = Field(
|
||||||
|
description="OCI region (e.g., us-ashburn-1)",
|
||||||
|
)
|
||||||
|
oci_compartment_id: str = Field(
|
||||||
|
description="OCI compartment ID for the Generative AI service",
|
||||||
|
)
|
||||||
|
oci_config_file_path: str | None = Field(
|
||||||
|
default="~/.oci/config",
|
||||||
|
description="OCI config file path (required if oci_auth_type is config_file)",
|
||||||
|
)
|
||||||
|
oci_config_profile: str | None = Field(
|
||||||
|
default="DEFAULT",
|
||||||
|
description="OCI config profile (required if oci_auth_type is config_file)",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OCIConfig(RemoteInferenceProviderConfig):
|
||||||
|
oci_auth_type: str = Field(
|
||||||
|
description="OCI authentication type (must be one of: instance_principal, config_file)",
|
||||||
|
default_factory=lambda: os.getenv("OCI_AUTH_TYPE", "instance_principal"),
|
||||||
|
)
|
||||||
|
oci_region: str = Field(
|
||||||
|
default_factory=lambda: os.getenv("OCI_REGION", "us-ashburn-1"),
|
||||||
|
description="OCI region (e.g., us-ashburn-1)",
|
||||||
|
)
|
||||||
|
oci_compartment_id: str = Field(
|
||||||
|
default_factory=lambda: os.getenv("OCI_COMPARTMENT_OCID", ""),
|
||||||
|
description="OCI compartment ID for the Generative AI service",
|
||||||
|
)
|
||||||
|
oci_config_file_path: str = Field(
|
||||||
|
default_factory=lambda: os.getenv("OCI_CONFIG_FILE_PATH", "~/.oci/config"),
|
||||||
|
description="OCI config file path (required if oci_auth_type is config_file)",
|
||||||
|
)
|
||||||
|
oci_config_profile: str = Field(
|
||||||
|
default_factory=lambda: os.getenv("OCI_CLI_PROFILE", "DEFAULT"),
|
||||||
|
description="OCI config profile (required if oci_auth_type is config_file)",
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(
|
||||||
|
cls,
|
||||||
|
oci_auth_type: str = "${env.OCI_AUTH_TYPE:=instance_principal}",
|
||||||
|
oci_config_file_path: str = "${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}",
|
||||||
|
oci_config_profile: str = "${env.OCI_CLI_PROFILE:=DEFAULT}",
|
||||||
|
oci_region: str = "${env.OCI_REGION:=us-ashburn-1}",
|
||||||
|
oci_compartment_id: str = "${env.OCI_COMPARTMENT_OCID:=}",
|
||||||
|
**kwargs,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"oci_auth_type": oci_auth_type,
|
||||||
|
"oci_config_file_path": oci_config_file_path,
|
||||||
|
"oci_config_profile": oci_config_profile,
|
||||||
|
"oci_region": oci_region,
|
||||||
|
"oci_compartment_id": oci_compartment_id,
|
||||||
|
}
|
||||||
140
src/llama_stack/providers/remote/inference/oci/oci.py
Normal file
140
src/llama_stack/providers/remote/inference/oci/oci.py
Normal file
|
|
@ -0,0 +1,140 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
|
from collections.abc import Iterable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import oci
|
||||||
|
from oci.generative_ai.generative_ai_client import GenerativeAiClient
|
||||||
|
from oci.generative_ai.models import ModelCollection
|
||||||
|
from openai._base_client import DefaultAsyncHttpxClient
|
||||||
|
|
||||||
|
from llama_stack.apis.inference.inference import (
|
||||||
|
OpenAIEmbeddingsRequestWithExtraBody,
|
||||||
|
OpenAIEmbeddingsResponse,
|
||||||
|
)
|
||||||
|
from llama_stack.apis.models import ModelType
|
||||||
|
from llama_stack.log import get_logger
|
||||||
|
from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
|
||||||
|
from llama_stack.providers.remote.inference.oci.config import OCIConfig
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
|
logger = get_logger(name=__name__, category="inference::oci")
|
||||||
|
|
||||||
|
OCI_AUTH_TYPE_INSTANCE_PRINCIPAL = "instance_principal"
|
||||||
|
OCI_AUTH_TYPE_CONFIG_FILE = "config_file"
|
||||||
|
VALID_OCI_AUTH_TYPES = [OCI_AUTH_TYPE_INSTANCE_PRINCIPAL, OCI_AUTH_TYPE_CONFIG_FILE]
|
||||||
|
DEFAULT_OCI_REGION = "us-ashburn-1"
|
||||||
|
|
||||||
|
MODEL_CAPABILITIES = ["TEXT_GENERATION", "TEXT_SUMMARIZATION", "TEXT_EMBEDDINGS", "CHAT"]
|
||||||
|
|
||||||
|
|
||||||
|
class OCIInferenceAdapter(OpenAIMixin):
|
||||||
|
config: OCIConfig
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
"""Initialize and validate OCI configuration."""
|
||||||
|
if self.config.oci_auth_type not in VALID_OCI_AUTH_TYPES:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid OCI authentication type: {self.config.oci_auth_type}."
|
||||||
|
f"Valid types are one of: {VALID_OCI_AUTH_TYPES}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self.config.oci_compartment_id:
|
||||||
|
raise ValueError("OCI_COMPARTMENT_OCID is a required parameter. Either set in env variable or config.")
|
||||||
|
|
||||||
|
def get_base_url(self) -> str:
|
||||||
|
region = self.config.oci_region or DEFAULT_OCI_REGION
|
||||||
|
return f"https://inference.generativeai.{region}.oci.oraclecloud.com/20231130/actions/v1"
|
||||||
|
|
||||||
|
def get_api_key(self) -> str | None:
|
||||||
|
# OCI doesn't use API keys, it uses request signing
|
||||||
|
return "<NOTUSED>"
|
||||||
|
|
||||||
|
def get_extra_client_params(self) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Get extra parameters for the AsyncOpenAI client, including OCI-specific auth and headers.
|
||||||
|
"""
|
||||||
|
auth = self._get_auth()
|
||||||
|
compartment_id = self.config.oci_compartment_id or ""
|
||||||
|
|
||||||
|
return {
|
||||||
|
"http_client": DefaultAsyncHttpxClient(
|
||||||
|
auth=auth,
|
||||||
|
headers={
|
||||||
|
"CompartmentId": compartment_id,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_oci_signer(self) -> oci.signer.AbstractBaseSigner | None:
|
||||||
|
if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
|
||||||
|
return oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_oci_config(self) -> dict:
|
||||||
|
if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
|
||||||
|
config = {"region": self.config.oci_region}
|
||||||
|
elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
|
||||||
|
config = oci.config.from_file(self.config.oci_config_file_path, self.config.oci_config_profile)
|
||||||
|
if not config.get("region"):
|
||||||
|
raise ValueError(
|
||||||
|
"Region not specified in config. Please specify in config or with OCI_REGION env variable."
|
||||||
|
)
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
def _get_auth(self) -> httpx.Auth:
|
||||||
|
if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
|
||||||
|
return OciInstancePrincipalAuth()
|
||||||
|
elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
|
||||||
|
return OciUserPrincipalAuth(
|
||||||
|
config_file=self.config.oci_config_file_path, profile_name=self.config.oci_config_profile
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid OCI authentication type: {self.config.oci_auth_type}")
|
||||||
|
|
||||||
|
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||||
|
"""
|
||||||
|
List available models from OCI Generative AI service.
|
||||||
|
"""
|
||||||
|
oci_config = self._get_oci_config()
|
||||||
|
oci_signer = self._get_oci_signer()
|
||||||
|
compartment_id = self.config.oci_compartment_id or ""
|
||||||
|
|
||||||
|
if oci_signer is None:
|
||||||
|
client = GenerativeAiClient(config=oci_config)
|
||||||
|
else:
|
||||||
|
client = GenerativeAiClient(config=oci_config, signer=oci_signer)
|
||||||
|
|
||||||
|
models: ModelCollection = client.list_models(
|
||||||
|
compartment_id=compartment_id, capability=MODEL_CAPABILITIES, lifecycle_state="ACTIVE"
|
||||||
|
).data
|
||||||
|
|
||||||
|
seen_models = set()
|
||||||
|
model_ids = []
|
||||||
|
for model in models.items:
|
||||||
|
if model.time_deprecated or model.time_on_demand_retired:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "CHAT" not in model.capabilities or "FINE_TUNE" in model.capabilities:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Use display_name + model_type as the key to avoid conflicts
|
||||||
|
model_key = (model.display_name, ModelType.llm)
|
||||||
|
if model_key in seen_models:
|
||||||
|
continue
|
||||||
|
|
||||||
|
seen_models.add(model_key)
|
||||||
|
model_ids.append(model.display_name)
|
||||||
|
|
||||||
|
return model_ids
|
||||||
|
|
||||||
|
async def openai_embeddings(self, params: OpenAIEmbeddingsRequestWithExtraBody) -> OpenAIEmbeddingsResponse:
|
||||||
|
# The constructed url is a mask that hits OCI's "chat" action, which is not supported for embeddings.
|
||||||
|
raise NotImplementedError("OCI Provider does not (currently) support embeddings")
|
||||||
|
|
@ -131,7 +131,6 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
self.kvstore = await kvstore_impl(self.config.persistence)
|
self.kvstore = await kvstore_impl(self.config.persistence)
|
||||||
self.vector_store_table = self.kvstore
|
|
||||||
|
|
||||||
if isinstance(self.config, RemoteChromaVectorIOConfig):
|
if isinstance(self.config, RemoteChromaVectorIOConfig):
|
||||||
log.info(f"Connecting to Chroma server at: {self.config.url}")
|
log.info(f"Connecting to Chroma server at: {self.config.url}")
|
||||||
|
|
@ -190,9 +189,16 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
||||||
if vector_store_id in self.cache:
|
if vector_store_id in self.cache:
|
||||||
return self.cache[vector_store_id]
|
return self.cache[vector_store_id]
|
||||||
|
|
||||||
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
|
# Try to load from kvstore
|
||||||
if not vector_store:
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
||||||
|
|
||||||
|
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
||||||
|
vector_store_data = await self.kvstore.get(key)
|
||||||
|
if not vector_store_data:
|
||||||
raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
|
raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
|
||||||
|
|
||||||
|
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||||
collection = await maybe_await(self.client.get_collection(vector_store_id))
|
collection = await maybe_await(self.client.get_collection(vector_store_id))
|
||||||
if not collection:
|
if not collection:
|
||||||
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
||||||
|
|
|
||||||
|
|
@ -328,13 +328,16 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
||||||
if vector_store_id in self.cache:
|
if vector_store_id in self.cache:
|
||||||
return self.cache[vector_store_id]
|
return self.cache[vector_store_id]
|
||||||
|
|
||||||
if self.vector_store_table is None:
|
# Try to load from kvstore
|
||||||
raise VectorStoreNotFoundError(vector_store_id)
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
||||||
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
|
|
||||||
if not vector_store:
|
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
||||||
|
vector_store_data = await self.kvstore.get(key)
|
||||||
|
if not vector_store_data:
|
||||||
raise VectorStoreNotFoundError(vector_store_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
|
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||||
index = VectorStoreWithIndex(
|
index = VectorStoreWithIndex(
|
||||||
vector_store=vector_store,
|
vector_store=vector_store,
|
||||||
index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
|
index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
|
||||||
|
|
|
||||||
|
|
@ -368,6 +368,22 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
|
||||||
log.exception("Could not connect to PGVector database server")
|
log.exception("Could not connect to PGVector database server")
|
||||||
raise RuntimeError("Could not connect to PGVector database server") from e
|
raise RuntimeError("Could not connect to PGVector database server") from e
|
||||||
|
|
||||||
|
# Load existing vector stores from KV store into cache
|
||||||
|
start_key = VECTOR_DBS_PREFIX
|
||||||
|
end_key = f"{VECTOR_DBS_PREFIX}\xff"
|
||||||
|
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
|
||||||
|
for vector_store_data in stored_vector_stores:
|
||||||
|
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||||
|
pgvector_index = PGVectorIndex(
|
||||||
|
vector_store=vector_store,
|
||||||
|
dimension=vector_store.embedding_dimension,
|
||||||
|
conn=self.conn,
|
||||||
|
kvstore=self.kvstore,
|
||||||
|
)
|
||||||
|
await pgvector_index.initialize()
|
||||||
|
index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
|
||||||
|
self.cache[vector_store.identifier] = index
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
if self.conn is not None:
|
if self.conn is not None:
|
||||||
self.conn.close()
|
self.conn.close()
|
||||||
|
|
@ -377,7 +393,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
|
||||||
|
|
||||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||||
# Persist vector DB metadata in the KV store
|
# Persist vector DB metadata in the KV store
|
||||||
assert self.kvstore is not None
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
|
||||||
|
|
||||||
|
# Save to kvstore for persistence
|
||||||
|
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
||||||
|
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
||||||
|
|
||||||
# Upsert model metadata in Postgres
|
# Upsert model metadata in Postgres
|
||||||
upsert_models(self.conn, [(vector_store.identifier, vector_store)])
|
upsert_models(self.conn, [(vector_store.identifier, vector_store)])
|
||||||
|
|
||||||
|
|
@ -396,7 +418,8 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
|
||||||
del self.cache[vector_store_id]
|
del self.cache[vector_store_id]
|
||||||
|
|
||||||
# Delete vector DB metadata from KV store
|
# Delete vector DB metadata from KV store
|
||||||
assert self.kvstore is not None
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
|
||||||
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
||||||
|
|
||||||
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||||
|
|
@ -413,13 +436,16 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
|
||||||
if vector_store_id in self.cache:
|
if vector_store_id in self.cache:
|
||||||
return self.cache[vector_store_id]
|
return self.cache[vector_store_id]
|
||||||
|
|
||||||
if self.vector_store_table is None:
|
# Try to load from kvstore
|
||||||
raise VectorStoreNotFoundError(vector_store_id)
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
||||||
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
|
|
||||||
if not vector_store:
|
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
||||||
|
vector_store_data = await self.kvstore.get(key)
|
||||||
|
if not vector_store_data:
|
||||||
raise VectorStoreNotFoundError(vector_store_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
|
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||||
index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
|
index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
|
||||||
await index.initialize()
|
await index.initialize()
|
||||||
self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
|
self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
|
||||||
|
|
|
||||||
|
|
@ -183,7 +183,8 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
||||||
await super().shutdown()
|
await super().shutdown()
|
||||||
|
|
||||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||||
assert self.kvstore is not None
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
|
||||||
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
||||||
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
||||||
|
|
||||||
|
|
@ -200,20 +201,24 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
||||||
await self.cache[vector_store_id].index.delete()
|
await self.cache[vector_store_id].index.delete()
|
||||||
del self.cache[vector_store_id]
|
del self.cache[vector_store_id]
|
||||||
|
|
||||||
assert self.kvstore is not None
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
||||||
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
||||||
|
|
||||||
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
|
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
|
||||||
if vector_store_id in self.cache:
|
if vector_store_id in self.cache:
|
||||||
return self.cache[vector_store_id]
|
return self.cache[vector_store_id]
|
||||||
|
|
||||||
if self.vector_store_table is None:
|
# Try to load from kvstore
|
||||||
raise ValueError(f"Vector DB not found {vector_store_id}")
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
||||||
|
|
||||||
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
|
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
||||||
if not vector_store:
|
vector_store_data = await self.kvstore.get(key)
|
||||||
|
if not vector_store_data:
|
||||||
raise VectorStoreNotFoundError(vector_store_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
|
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||||
index = VectorStoreWithIndex(
|
index = VectorStoreWithIndex(
|
||||||
vector_store=vector_store,
|
vector_store=vector_store,
|
||||||
index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
|
index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
|
||||||
|
|
|
||||||
|
|
@ -346,13 +346,16 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
|
||||||
if vector_store_id in self.cache:
|
if vector_store_id in self.cache:
|
||||||
return self.cache[vector_store_id]
|
return self.cache[vector_store_id]
|
||||||
|
|
||||||
if self.vector_store_table is None:
|
# Try to load from kvstore
|
||||||
raise VectorStoreNotFoundError(vector_store_id)
|
if self.kvstore is None:
|
||||||
|
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
||||||
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
|
|
||||||
if not vector_store:
|
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
||||||
|
vector_store_data = await self.kvstore.get(key)
|
||||||
|
if not vector_store_data:
|
||||||
raise VectorStoreNotFoundError(vector_store_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
|
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
|
sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
|
||||||
if not client.collections.exists(sanitized_collection_name):
|
if not client.collections.exists(sanitized_collection_name):
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
|
||||||
VectorStoreContent,
|
VectorStoreContent,
|
||||||
VectorStoreDeleteResponse,
|
VectorStoreDeleteResponse,
|
||||||
VectorStoreFileBatchObject,
|
VectorStoreFileBatchObject,
|
||||||
VectorStoreFileContentsResponse,
|
VectorStoreFileContentResponse,
|
||||||
VectorStoreFileCounts,
|
VectorStoreFileCounts,
|
||||||
VectorStoreFileDeleteResponse,
|
VectorStoreFileDeleteResponse,
|
||||||
VectorStoreFileLastError,
|
VectorStoreFileLastError,
|
||||||
|
|
@ -921,22 +921,21 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
self,
|
self,
|
||||||
vector_store_id: str,
|
vector_store_id: str,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
) -> VectorStoreFileContentsResponse:
|
) -> VectorStoreFileContentResponse:
|
||||||
"""Retrieves the contents of a vector store file."""
|
"""Retrieves the contents of a vector store file."""
|
||||||
if vector_store_id not in self.openai_vector_stores:
|
if vector_store_id not in self.openai_vector_stores:
|
||||||
raise VectorStoreNotFoundError(vector_store_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
|
||||||
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
|
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
|
||||||
chunks = [Chunk.model_validate(c) for c in dict_chunks]
|
chunks = [Chunk.model_validate(c) for c in dict_chunks]
|
||||||
content = []
|
content = []
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
content.extend(self._chunk_to_vector_store_content(chunk))
|
content.extend(self._chunk_to_vector_store_content(chunk))
|
||||||
return VectorStoreFileContentsResponse(
|
return VectorStoreFileContentResponse(
|
||||||
file_id=file_id,
|
object="vector_store.file_content.page",
|
||||||
filename=file_info.get("filename", ""),
|
data=content,
|
||||||
attributes=file_info.get("attributes", {}),
|
has_more=False,
|
||||||
content=content,
|
next_page=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def openai_update_vector_store_file(
|
async def openai_update_vector_store_file(
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
||||||
# {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
|
# {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
|
||||||
# or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
|
# or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
|
||||||
"remote::groq",
|
"remote::groq",
|
||||||
|
"remote::oci",
|
||||||
"remote::gemini", # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
|
"remote::gemini", # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
|
||||||
"remote::anthropic", # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
|
"remote::anthropic", # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
|
||||||
"remote::azure", # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation
|
"remote::azure", # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation
|
||||||
|
|
|
||||||
|
|
@ -138,6 +138,7 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
|
||||||
"remote::runpod",
|
"remote::runpod",
|
||||||
"remote::sambanova",
|
"remote::sambanova",
|
||||||
"remote::tgi",
|
"remote::tgi",
|
||||||
|
"remote::oci",
|
||||||
):
|
):
|
||||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI embeddings.")
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI embeddings.")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -907,16 +907,16 @@ def test_openai_vector_store_retrieve_file_contents(
|
||||||
)
|
)
|
||||||
|
|
||||||
assert file_contents is not None
|
assert file_contents is not None
|
||||||
assert len(file_contents.content) == 1
|
assert file_contents.object == "vector_store.file_content.page"
|
||||||
content = file_contents.content[0]
|
assert len(file_contents.data) == 1
|
||||||
|
content = file_contents.data[0]
|
||||||
|
|
||||||
# llama-stack-client returns a model, openai-python is a badboy and returns a dict
|
# llama-stack-client returns a model, openai-python is a badboy and returns a dict
|
||||||
if not isinstance(content, dict):
|
if not isinstance(content, dict):
|
||||||
content = content.model_dump()
|
content = content.model_dump()
|
||||||
assert content["type"] == "text"
|
assert content["type"] == "text"
|
||||||
assert content["text"] == test_content.decode("utf-8")
|
assert content["text"] == test_content.decode("utf-8")
|
||||||
assert file_contents.filename == file_name
|
assert file_contents.has_more is False
|
||||||
assert file_contents.attributes == attributes
|
|
||||||
|
|
||||||
|
|
||||||
@vector_provider_wrapper
|
@vector_provider_wrapper
|
||||||
|
|
@ -1483,14 +1483,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
|
||||||
)
|
)
|
||||||
|
|
||||||
assert file_contents is not None
|
assert file_contents is not None
|
||||||
assert file_contents.filename == file_data[i][0]
|
assert file_contents.object == "vector_store.file_content.page"
|
||||||
assert len(file_contents.content) > 0
|
assert len(file_contents.data) > 0
|
||||||
|
|
||||||
# Verify the content matches what we uploaded
|
# Verify the content matches what we uploaded
|
||||||
content_text = (
|
content_text = (
|
||||||
file_contents.content[0].text
|
file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
|
||||||
if hasattr(file_contents.content[0], "text")
|
|
||||||
else file_contents.content[0]["text"]
|
|
||||||
)
|
)
|
||||||
assert file_data[i][1].decode("utf-8") in content_text
|
assert file_data[i][1].decode("utf-8") in content_text
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,99 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter):
|
||||||
await vector_io_adapter.shutdown()
|
await vector_io_adapter.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_vector_store_lazy_loading_from_kvstore(vector_io_adapter):
|
||||||
|
"""
|
||||||
|
Test that vector stores can be lazy-loaded from KV store when not in cache.
|
||||||
|
|
||||||
|
Verifies that clearing the cache doesn't break vector store access - they
|
||||||
|
can be loaded on-demand from persistent storage.
|
||||||
|
"""
|
||||||
|
await vector_io_adapter.initialize()
|
||||||
|
|
||||||
|
vector_store_id = f"lazy_load_test_{np.random.randint(1e6)}"
|
||||||
|
vector_store = VectorStore(
|
||||||
|
identifier=vector_store_id,
|
||||||
|
provider_id="test_provider",
|
||||||
|
embedding_model="test_model",
|
||||||
|
embedding_dimension=128,
|
||||||
|
)
|
||||||
|
await vector_io_adapter.register_vector_store(vector_store)
|
||||||
|
assert vector_store_id in vector_io_adapter.cache
|
||||||
|
|
||||||
|
vector_io_adapter.cache.clear()
|
||||||
|
assert vector_store_id not in vector_io_adapter.cache
|
||||||
|
|
||||||
|
loaded_index = await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
|
||||||
|
assert loaded_index is not None
|
||||||
|
assert loaded_index.vector_store.identifier == vector_store_id
|
||||||
|
assert vector_store_id in vector_io_adapter.cache
|
||||||
|
|
||||||
|
cached_index = await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
|
||||||
|
assert cached_index is loaded_index
|
||||||
|
|
||||||
|
await vector_io_adapter.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_vector_store_preloading_on_initialization(vector_io_adapter):
|
||||||
|
"""
|
||||||
|
Test that vector stores are preloaded from KV store during initialization.
|
||||||
|
|
||||||
|
Verifies that after restart, all vector stores are automatically loaded into
|
||||||
|
cache and immediately accessible without requiring lazy loading.
|
||||||
|
"""
|
||||||
|
await vector_io_adapter.initialize()
|
||||||
|
|
||||||
|
vector_store_ids = [f"preload_test_{i}_{np.random.randint(1e6)}" for i in range(3)]
|
||||||
|
for vs_id in vector_store_ids:
|
||||||
|
vector_store = VectorStore(
|
||||||
|
identifier=vs_id,
|
||||||
|
provider_id="test_provider",
|
||||||
|
embedding_model="test_model",
|
||||||
|
embedding_dimension=128,
|
||||||
|
)
|
||||||
|
await vector_io_adapter.register_vector_store(vector_store)
|
||||||
|
|
||||||
|
for vs_id in vector_store_ids:
|
||||||
|
assert vs_id in vector_io_adapter.cache
|
||||||
|
|
||||||
|
await vector_io_adapter.shutdown()
|
||||||
|
await vector_io_adapter.initialize()
|
||||||
|
|
||||||
|
for vs_id in vector_store_ids:
|
||||||
|
assert vs_id in vector_io_adapter.cache
|
||||||
|
|
||||||
|
for vs_id in vector_store_ids:
|
||||||
|
loaded_index = await vector_io_adapter._get_and_cache_vector_store_index(vs_id)
|
||||||
|
assert loaded_index is not None
|
||||||
|
assert loaded_index.vector_store.identifier == vs_id
|
||||||
|
|
||||||
|
await vector_io_adapter.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_kvstore_none_raises_runtime_error(vector_io_adapter):
|
||||||
|
"""
|
||||||
|
Test that accessing vector stores with uninitialized kvstore raises RuntimeError.
|
||||||
|
|
||||||
|
Verifies proper RuntimeError is raised instead of assertions when kvstore is None.
|
||||||
|
"""
|
||||||
|
await vector_io_adapter.initialize()
|
||||||
|
|
||||||
|
vector_store_id = f"kvstore_none_test_{np.random.randint(1e6)}"
|
||||||
|
vector_store = VectorStore(
|
||||||
|
identifier=vector_store_id,
|
||||||
|
provider_id="test_provider",
|
||||||
|
embedding_model="test_model",
|
||||||
|
embedding_dimension=128,
|
||||||
|
)
|
||||||
|
await vector_io_adapter.register_vector_store(vector_store)
|
||||||
|
|
||||||
|
vector_io_adapter.cache.clear()
|
||||||
|
vector_io_adapter.kvstore = None
|
||||||
|
|
||||||
|
with pytest.raises(RuntimeError, match="KVStore not initialized"):
|
||||||
|
await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
|
||||||
|
|
||||||
|
|
||||||
async def test_register_and_unregister_vector_store(vector_io_adapter):
|
async def test_register_and_unregister_vector_store(vector_io_adapter):
|
||||||
unique_id = f"foo_db_{np.random.randint(1e6)}"
|
unique_id = f"foo_db_{np.random.randint(1e6)}"
|
||||||
dummy = VectorStore(
|
dummy = VectorStore(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue