diff --git a/llama_toolchain/agentic_system/api/datatypes.py b/llama_toolchain/agentic_system/api/datatypes.py
index db4e40c4b..648aed698 100644
--- a/llama_toolchain/agentic_system/api/datatypes.py
+++ b/llama_toolchain/agentic_system/api/datatypes.py
@@ -151,8 +151,6 @@ class AgenticSystemInstanceConfig(BaseModel):
     input_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
     output_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
 
-    quantization_config: Optional[QuantizationConfig] = None
-
     # if you completely want to replace the messages prefixed by the system,
     # this is debug only
     debug_prefix_messages: Optional[List[Message]] = Field(default_factory=list)
diff --git a/llama_toolchain/agentic_system/client.py b/llama_toolchain/agentic_system/client.py
index 154bca614..56428c425 100644
--- a/llama_toolchain/agentic_system/client.py
+++ b/llama_toolchain/agentic_system/client.py
@@ -135,7 +135,6 @@ async def run_main(host: str, port: int):
             available_tools=tool_definitions,
             input_shields=[],
             output_shields=[],
-            quantization_config=None,
             debug_prefix_messages=[],
             tool_prompt_format=ToolPromptFormat.json,
         ),
diff --git a/llama_toolchain/inference/api/endpoints.py b/llama_toolchain/inference/api/endpoints.py
index a3ec18c95..ef1c7b159 100644
--- a/llama_toolchain/inference/api/endpoints.py
+++ b/llama_toolchain/inference/api/endpoints.py
@@ -19,7 +19,6 @@ class CompletionRequest(BaseModel):
 
     stream: Optional[bool] = False
     logprobs: Optional[LogProbConfig] = None
-    quantization_config: Optional[QuantizationConfig] = None
 
 
 @json_schema_type
@@ -43,7 +42,6 @@ class BatchCompletionRequest(BaseModel):
     content_batch: List[InterleavedTextAttachment]
     sampling_params: Optional[SamplingParams] = SamplingParams()
     logprobs: Optional[LogProbConfig] = None
-    quantization_config: Optional[QuantizationConfig] = None
 
 
 @json_schema_type
@@ -62,7 +60,6 @@ class ChatCompletionRequest(BaseModel):
 
     stream: Optional[bool] = False
     logprobs: Optional[LogProbConfig] = None
-    quantization_config: Optional[QuantizationConfig] = None
 
 
 @json_schema_type
@@ -88,7 +85,6 @@ class BatchChatCompletionRequest(BaseModel):
     available_tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
 
     logprobs: Optional[LogProbConfig] = None
-    quantization_config: Optional[QuantizationConfig] = None
 
 
 @json_schema_type
diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
index f59653edc..f8dab9ec3 100644
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
@@ -21,7 +21,7 @@
     "info": {
         "title": "[DRAFT] Llama Stack Specification",
         "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-08-20 19:00:39.110138"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-08-21 14:16:38.313950"
     },
     "servers": [
         {
@@ -1760,16 +1760,6 @@
                             }
                         },
                         "additionalProperties": false
-                    },
-                    "quantization_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
-                            }
-                        ]
                     }
                 },
                 "additionalProperties": false,
@@ -1778,19 +1768,6 @@
                     "messages_batch"
                 ]
             },
-            "Bf16QuantizationConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "bf16"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ]
-            },
             "BuiltinTool": {
                 "type": "string",
                 "enum": [
@@ -1848,19 +1825,6 @@
                     "tool_calls"
                 ]
             },
-            "Fp8QuantizationConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "fp8"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ]
-            },
             "SamplingParams": {
                 "type": "object",
                 "properties": {
@@ -2229,16 +2193,6 @@
                             }
                         },
                         "additionalProperties": false
-                    },
-                    "quantization_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
-                            }
-                        ]
                     }
                 },
                 "additionalProperties": false,
@@ -2307,16 +2261,6 @@
                             }
                         },
                         "additionalProperties": false
-                    },
-                    "quantization_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
-                            }
-                        ]
                     }
                 },
                 "additionalProperties": false,
@@ -2469,16 +2413,6 @@
                             }
                         },
                         "additionalProperties": false
-                    },
-                    "quantization_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
-                            }
-                        ]
                     }
                 },
                 "additionalProperties": false,
@@ -2552,16 +2486,6 @@
                             "$ref": "#/components/schemas/ShieldDefinition"
                         }
                     },
-                    "quantization_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
-                            }
-                        ]
-                    },
                     "debug_prefix_messages": {
                         "type": "array",
                         "items": {
@@ -4782,30 +4706,30 @@
         {
             "name": "RewardScoring"
         },
-        {
-            "name": "AgenticSystem"
-        },
-        {
-            "name": "SyntheticDataGeneration"
-        },
-        {
-            "name": "Inference"
-        },
         {
             "name": "Datasets"
         },
         {
             "name": "Observability"
         },
+        {
+            "name": "AgenticSystem"
+        },
+        {
+            "name": "Inference"
+        },
+        {
+            "name": "Evaluations"
+        },
+        {
+            "name": "SyntheticDataGeneration"
+        },
         {
             "name": "PostTraining"
         },
         {
             "name": "MemoryBanks"
         },
-        {
-            "name": "Evaluations"
-        },
         {
             "name": "Attachment",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Attachment\" />"
@@ -4814,10 +4738,6 @@
             "name": "BatchChatCompletionRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchChatCompletionRequest\" />"
         },
-        {
-            "name": "Bf16QuantizationConfig",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Bf16QuantizationConfig\" />"
-        },
         {
             "name": "BuiltinTool",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
@@ -4826,10 +4746,6 @@
             "name": "CompletionMessage",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionMessage\" />"
         },
-        {
-            "name": "Fp8QuantizationConfig",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Fp8QuantizationConfig\" />"
-        },
         {
             "name": "SamplingParams",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/SamplingParams\" />"
@@ -5245,7 +5161,6 @@
                 "BatchChatCompletionResponse",
                 "BatchCompletionRequest",
                 "BatchCompletionResponse",
-                "Bf16QuantizationConfig",
                 "BuiltinShield",
                 "BuiltinTool",
                 "ChatCompletionRequest",
@@ -5272,7 +5187,6 @@
                 "Experiment",
                 "ExperimentStatus",
                 "FinetuningAlgorithm",
-                "Fp8QuantizationConfig",
                 "InferenceStep",
                 "Log",
                 "LogMessagesRequest",
diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
index 837036811..7cfb22669 100644
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
@@ -45,10 +45,6 @@ components:
           items:
             $ref: '#/components/schemas/ShieldDefinition'
           type: array
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
         tool_prompt_format:
@@ -216,10 +212,6 @@ components:
           type: array
         model:
           type: string
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
       required:
@@ -258,10 +250,6 @@ components:
           type: object
         model:
           type: string
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
       required:
@@ -278,15 +266,6 @@ components:
       required:
       - completion_message_batch
       type: object
-    Bf16QuantizationConfig:
-      additionalProperties: false
-      properties:
-        type:
-          const: bf16
-          type: string
-      required:
-      - type
-      type: object
     BuiltinShield:
       enum:
       - llama_guard
@@ -325,10 +304,6 @@ components:
           type: array
         model:
           type: string
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
         stream:
@@ -421,10 +396,6 @@ components:
           type: object
         model:
           type: string
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
         stream:
@@ -717,15 +688,6 @@ components:
       - qlora
       - dora
       type: string
-    Fp8QuantizationConfig:
-      additionalProperties: false
-      properties:
-        type:
-          const: fp8
-          type: string
-      required:
-      - type
-      type: object
     InferenceStep:
       additionalProperties: false
       properties:
@@ -1867,7 +1829,7 @@ info:
   description: "This is the specification of the llama stack that provides\n     \
     \           a set of endpoints and their corresponding interfaces that are tailored\
     \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-08-20 19:00:39.110138"
+    \ draft and subject to change.\n                Generated at 2024-08-21 14:16:38.313950"
   title: '[DRAFT] Llama Stack Specification'
   version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -2871,30 +2833,24 @@ servers:
 - url: http://any-hosted-llama-stack.com
 tags:
 - name: RewardScoring
-- name: AgenticSystem
-- name: SyntheticDataGeneration
-- name: Inference
 - name: Datasets
 - name: Observability
+- name: AgenticSystem
+- name: Inference
+- name: Evaluations
+- name: SyntheticDataGeneration
 - name: PostTraining
 - name: MemoryBanks
-- name: Evaluations
 - description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
   name: Attachment
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
     />
   name: BatchChatCompletionRequest
-- description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig"
-    />
-  name: Bf16QuantizationConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
   name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
     />
   name: CompletionMessage
-- description: <SchemaDefinition schemaRef="#/components/schemas/Fp8QuantizationConfig"
-    />
-  name: Fp8QuantizationConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
   name: SamplingParams
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
@@ -3252,7 +3208,6 @@ x-tagGroups:
   - BatchChatCompletionResponse
   - BatchCompletionRequest
   - BatchCompletionResponse
-  - Bf16QuantizationConfig
   - BuiltinShield
   - BuiltinTool
   - ChatCompletionRequest
@@ -3279,7 +3234,6 @@ x-tagGroups:
   - Experiment
   - ExperimentStatus
   - FinetuningAlgorithm
-  - Fp8QuantizationConfig
   - InferenceStep
   - Log
   - LogMessagesRequest