forked from phoenix-oss/llama-stack-mirror
Remove quantization_config
from the APIs for now
This commit is contained in:
parent
ab0a24f333
commit
863bb915e1
5 changed files with 18 additions and 157 deletions
|
@ -151,8 +151,6 @@ class AgenticSystemInstanceConfig(BaseModel):
|
||||||
input_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
|
input_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
|
||||||
output_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
|
output_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
|
||||||
|
|
||||||
quantization_config: Optional[QuantizationConfig] = None
|
|
||||||
|
|
||||||
# if you completely want to replace the messages prefixed by the system,
|
# if you completely want to replace the messages prefixed by the system,
|
||||||
# this is debug only
|
# this is debug only
|
||||||
debug_prefix_messages: Optional[List[Message]] = Field(default_factory=list)
|
debug_prefix_messages: Optional[List[Message]] = Field(default_factory=list)
|
||||||
|
|
|
@ -135,7 +135,6 @@ async def run_main(host: str, port: int):
|
||||||
available_tools=tool_definitions,
|
available_tools=tool_definitions,
|
||||||
input_shields=[],
|
input_shields=[],
|
||||||
output_shields=[],
|
output_shields=[],
|
||||||
quantization_config=None,
|
|
||||||
debug_prefix_messages=[],
|
debug_prefix_messages=[],
|
||||||
tool_prompt_format=ToolPromptFormat.json,
|
tool_prompt_format=ToolPromptFormat.json,
|
||||||
),
|
),
|
||||||
|
|
|
@ -19,7 +19,6 @@ class CompletionRequest(BaseModel):
|
||||||
|
|
||||||
stream: Optional[bool] = False
|
stream: Optional[bool] = False
|
||||||
logprobs: Optional[LogProbConfig] = None
|
logprobs: Optional[LogProbConfig] = None
|
||||||
quantization_config: Optional[QuantizationConfig] = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
@ -43,7 +42,6 @@ class BatchCompletionRequest(BaseModel):
|
||||||
content_batch: List[InterleavedTextAttachment]
|
content_batch: List[InterleavedTextAttachment]
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||||
logprobs: Optional[LogProbConfig] = None
|
logprobs: Optional[LogProbConfig] = None
|
||||||
quantization_config: Optional[QuantizationConfig] = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
@ -62,7 +60,6 @@ class ChatCompletionRequest(BaseModel):
|
||||||
|
|
||||||
stream: Optional[bool] = False
|
stream: Optional[bool] = False
|
||||||
logprobs: Optional[LogProbConfig] = None
|
logprobs: Optional[LogProbConfig] = None
|
||||||
quantization_config: Optional[QuantizationConfig] = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
@ -88,7 +85,6 @@ class BatchChatCompletionRequest(BaseModel):
|
||||||
available_tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
|
available_tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
|
||||||
|
|
||||||
logprobs: Optional[LogProbConfig] = None
|
logprobs: Optional[LogProbConfig] = None
|
||||||
quantization_config: Optional[QuantizationConfig] = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
"info": {
|
"info": {
|
||||||
"title": "[DRAFT] Llama Stack Specification",
|
"title": "[DRAFT] Llama Stack Specification",
|
||||||
"version": "0.0.1",
|
"version": "0.0.1",
|
||||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-08-20 19:00:39.110138"
|
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-08-21 14:16:38.313950"
|
||||||
},
|
},
|
||||||
"servers": [
|
"servers": [
|
||||||
{
|
{
|
||||||
|
@ -1760,16 +1760,6 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false
|
"additionalProperties": false
|
||||||
},
|
|
||||||
"quantization_config": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/Bf16QuantizationConfig"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/Fp8QuantizationConfig"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -1778,19 +1768,6 @@
|
||||||
"messages_batch"
|
"messages_batch"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"Bf16QuantizationConfig": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "bf16"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"BuiltinTool": {
|
"BuiltinTool": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
|
@ -1848,19 +1825,6 @@
|
||||||
"tool_calls"
|
"tool_calls"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"Fp8QuantizationConfig": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "fp8"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"SamplingParams": {
|
"SamplingParams": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -2229,16 +2193,6 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false
|
"additionalProperties": false
|
||||||
},
|
|
||||||
"quantization_config": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/Bf16QuantizationConfig"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/Fp8QuantizationConfig"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -2307,16 +2261,6 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false
|
"additionalProperties": false
|
||||||
},
|
|
||||||
"quantization_config": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/Bf16QuantizationConfig"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/Fp8QuantizationConfig"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -2469,16 +2413,6 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false
|
"additionalProperties": false
|
||||||
},
|
|
||||||
"quantization_config": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/Bf16QuantizationConfig"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/Fp8QuantizationConfig"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -2552,16 +2486,6 @@
|
||||||
"$ref": "#/components/schemas/ShieldDefinition"
|
"$ref": "#/components/schemas/ShieldDefinition"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"quantization_config": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/Bf16QuantizationConfig"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/Fp8QuantizationConfig"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"debug_prefix_messages": {
|
"debug_prefix_messages": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
@ -4782,30 +4706,30 @@
|
||||||
{
|
{
|
||||||
"name": "RewardScoring"
|
"name": "RewardScoring"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "AgenticSystem"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "SyntheticDataGeneration"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Inference"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "Datasets"
|
"name": "Datasets"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Observability"
|
"name": "Observability"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "AgenticSystem"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Inference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Evaluations"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "SyntheticDataGeneration"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "PostTraining"
|
"name": "PostTraining"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "MemoryBanks"
|
"name": "MemoryBanks"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "Evaluations"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "Attachment",
|
"name": "Attachment",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/Attachment\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/Attachment\" />"
|
||||||
|
@ -4814,10 +4738,6 @@
|
||||||
"name": "BatchChatCompletionRequest",
|
"name": "BatchChatCompletionRequest",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchChatCompletionRequest\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchChatCompletionRequest\" />"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "Bf16QuantizationConfig",
|
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/Bf16QuantizationConfig\" />"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "BuiltinTool",
|
"name": "BuiltinTool",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
|
||||||
|
@ -4826,10 +4746,6 @@
|
||||||
"name": "CompletionMessage",
|
"name": "CompletionMessage",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionMessage\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionMessage\" />"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "Fp8QuantizationConfig",
|
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/Fp8QuantizationConfig\" />"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "SamplingParams",
|
"name": "SamplingParams",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/SamplingParams\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/SamplingParams\" />"
|
||||||
|
@ -5245,7 +5161,6 @@
|
||||||
"BatchChatCompletionResponse",
|
"BatchChatCompletionResponse",
|
||||||
"BatchCompletionRequest",
|
"BatchCompletionRequest",
|
||||||
"BatchCompletionResponse",
|
"BatchCompletionResponse",
|
||||||
"Bf16QuantizationConfig",
|
|
||||||
"BuiltinShield",
|
"BuiltinShield",
|
||||||
"BuiltinTool",
|
"BuiltinTool",
|
||||||
"ChatCompletionRequest",
|
"ChatCompletionRequest",
|
||||||
|
@ -5272,7 +5187,6 @@
|
||||||
"Experiment",
|
"Experiment",
|
||||||
"ExperimentStatus",
|
"ExperimentStatus",
|
||||||
"FinetuningAlgorithm",
|
"FinetuningAlgorithm",
|
||||||
"Fp8QuantizationConfig",
|
|
||||||
"InferenceStep",
|
"InferenceStep",
|
||||||
"Log",
|
"Log",
|
||||||
"LogMessagesRequest",
|
"LogMessagesRequest",
|
||||||
|
|
|
@ -45,10 +45,6 @@ components:
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/ShieldDefinition'
|
$ref: '#/components/schemas/ShieldDefinition'
|
||||||
type: array
|
type: array
|
||||||
quantization_config:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
|
||||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
|
||||||
sampling_params:
|
sampling_params:
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
$ref: '#/components/schemas/SamplingParams'
|
||||||
tool_prompt_format:
|
tool_prompt_format:
|
||||||
|
@ -216,10 +212,6 @@ components:
|
||||||
type: array
|
type: array
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
quantization_config:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
|
||||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
|
||||||
sampling_params:
|
sampling_params:
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
$ref: '#/components/schemas/SamplingParams'
|
||||||
required:
|
required:
|
||||||
|
@ -258,10 +250,6 @@ components:
|
||||||
type: object
|
type: object
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
quantization_config:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
|
||||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
|
||||||
sampling_params:
|
sampling_params:
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
$ref: '#/components/schemas/SamplingParams'
|
||||||
required:
|
required:
|
||||||
|
@ -278,15 +266,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- completion_message_batch
|
- completion_message_batch
|
||||||
type: object
|
type: object
|
||||||
Bf16QuantizationConfig:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: bf16
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
type: object
|
|
||||||
BuiltinShield:
|
BuiltinShield:
|
||||||
enum:
|
enum:
|
||||||
- llama_guard
|
- llama_guard
|
||||||
|
@ -325,10 +304,6 @@ components:
|
||||||
type: array
|
type: array
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
quantization_config:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
|
||||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
|
||||||
sampling_params:
|
sampling_params:
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
$ref: '#/components/schemas/SamplingParams'
|
||||||
stream:
|
stream:
|
||||||
|
@ -421,10 +396,6 @@ components:
|
||||||
type: object
|
type: object
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
quantization_config:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
|
||||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
|
||||||
sampling_params:
|
sampling_params:
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
$ref: '#/components/schemas/SamplingParams'
|
||||||
stream:
|
stream:
|
||||||
|
@ -717,15 +688,6 @@ components:
|
||||||
- qlora
|
- qlora
|
||||||
- dora
|
- dora
|
||||||
type: string
|
type: string
|
||||||
Fp8QuantizationConfig:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: fp8
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
type: object
|
|
||||||
InferenceStep:
|
InferenceStep:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
@ -1867,7 +1829,7 @@ info:
|
||||||
description: "This is the specification of the llama stack that provides\n \
|
description: "This is the specification of the llama stack that provides\n \
|
||||||
\ a set of endpoints and their corresponding interfaces that are tailored\
|
\ a set of endpoints and their corresponding interfaces that are tailored\
|
||||||
\ to\n best leverage Llama Models. The specification is still in\
|
\ to\n best leverage Llama Models. The specification is still in\
|
||||||
\ draft and subject to change.\n Generated at 2024-08-20 19:00:39.110138"
|
\ draft and subject to change.\n Generated at 2024-08-21 14:16:38.313950"
|
||||||
title: '[DRAFT] Llama Stack Specification'
|
title: '[DRAFT] Llama Stack Specification'
|
||||||
version: 0.0.1
|
version: 0.0.1
|
||||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||||
|
@ -2871,30 +2833,24 @@ servers:
|
||||||
- url: http://any-hosted-llama-stack.com
|
- url: http://any-hosted-llama-stack.com
|
||||||
tags:
|
tags:
|
||||||
- name: RewardScoring
|
- name: RewardScoring
|
||||||
- name: AgenticSystem
|
|
||||||
- name: SyntheticDataGeneration
|
|
||||||
- name: Inference
|
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
- name: Observability
|
- name: Observability
|
||||||
|
- name: AgenticSystem
|
||||||
|
- name: Inference
|
||||||
|
- name: Evaluations
|
||||||
|
- name: SyntheticDataGeneration
|
||||||
- name: PostTraining
|
- name: PostTraining
|
||||||
- name: MemoryBanks
|
- name: MemoryBanks
|
||||||
- name: Evaluations
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
|
||||||
name: Attachment
|
name: Attachment
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
|
||||||
/>
|
/>
|
||||||
name: BatchChatCompletionRequest
|
name: BatchChatCompletionRequest
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig"
|
|
||||||
/>
|
|
||||||
name: Bf16QuantizationConfig
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
|
||||||
name: BuiltinTool
|
name: BuiltinTool
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
||||||
/>
|
/>
|
||||||
name: CompletionMessage
|
name: CompletionMessage
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/Fp8QuantizationConfig"
|
|
||||||
/>
|
|
||||||
name: Fp8QuantizationConfig
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
|
||||||
name: SamplingParams
|
name: SamplingParams
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
|
||||||
|
@ -3252,7 +3208,6 @@ x-tagGroups:
|
||||||
- BatchChatCompletionResponse
|
- BatchChatCompletionResponse
|
||||||
- BatchCompletionRequest
|
- BatchCompletionRequest
|
||||||
- BatchCompletionResponse
|
- BatchCompletionResponse
|
||||||
- Bf16QuantizationConfig
|
|
||||||
- BuiltinShield
|
- BuiltinShield
|
||||||
- BuiltinTool
|
- BuiltinTool
|
||||||
- ChatCompletionRequest
|
- ChatCompletionRequest
|
||||||
|
@ -3279,7 +3234,6 @@ x-tagGroups:
|
||||||
- Experiment
|
- Experiment
|
||||||
- ExperimentStatus
|
- ExperimentStatus
|
||||||
- FinetuningAlgorithm
|
- FinetuningAlgorithm
|
||||||
- Fp8QuantizationConfig
|
|
||||||
- InferenceStep
|
- InferenceStep
|
||||||
- Log
|
- Log
|
||||||
- LogMessagesRequest
|
- LogMessagesRequest
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue