Remove quantization_config from the APIs for now

2025-12-05 10:23:44 +00:00 · 2024-08-21 14:17:05 -07:00 · 2024-08-21 14:17:05 -07:00 · 863bb915e1
commit 863bb915e1
parent ab0a24f333
5 changed files with 18 additions and 157 deletions
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
@ -45,10 +45,6 @@ components:
          items:
            $ref: '#/components/schemas/ShieldDefinition'
          type: array
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
        tool_prompt_format:
@ -216,10 +212,6 @@ components:
          type: array
        model:
          type: string
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
      required:
@ -258,10 +250,6 @@ components:
          type: object
        model:
          type: string
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
      required:
@ -278,15 +266,6 @@ components:
      required:
      - completion_message_batch
      type: object
-    Bf16QuantizationConfig:
-      additionalProperties: false
-      properties:
-        type:
-          const: bf16
-          type: string
-      required:
-      - type
-      type: object
    BuiltinShield:
      enum:
      - llama_guard
@ -325,10 +304,6 @@ components:
          type: array
        model:
          type: string
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
        stream:
@ -421,10 +396,6 @@ components:
          type: object
        model:
          type: string
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
        stream:
@ -717,15 +688,6 @@ components:
      - qlora
      - dora
      type: string
-    Fp8QuantizationConfig:
-      additionalProperties: false
-      properties:
-        type:
-          const: fp8
-          type: string
-      required:
-      - type
-      type: object
    InferenceStep:
      additionalProperties: false
      properties:
@ -1867,7 +1829,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-08-20 19:00:39.110138"
+    \ draft and subject to change.\n                Generated at 2024-08-21 14:16:38.313950"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -2871,30 +2833,24 @@ servers:
 - url: http://any-hosted-llama-stack.com
 tags:
 - name: RewardScoring
- name: AgenticSystem
- name: SyntheticDataGeneration
- name: Inference
 - name: Datasets
 - name: Observability
+- name: AgenticSystem
+- name: Inference
+- name: Evaluations
+- name: SyntheticDataGeneration
 - name: PostTraining
 - name: MemoryBanks
- name: Evaluations
 - description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
  name: Attachment
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
    />
  name: BatchChatCompletionRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig"
-    />
-  name: Bf16QuantizationConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
  name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
    />
  name: CompletionMessage
- description: <SchemaDefinition schemaRef="#/components/schemas/Fp8QuantizationConfig"
-    />
-  name: Fp8QuantizationConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
  name: SamplingParams
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
@ -3252,7 +3208,6 @@ x-tagGroups:
  - BatchChatCompletionResponse
  - BatchCompletionRequest
  - BatchCompletionResponse
-  - Bf16QuantizationConfig
  - BuiltinShield
  - BuiltinTool
  - ChatCompletionRequest
@ -3279,7 +3234,6 @@ x-tagGroups:
  - Experiment
  - ExperimentStatus
  - FinetuningAlgorithm
-  - Fp8QuantizationConfig
  - InferenceStep
  - Log
  - LogMessagesRequest