diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 709360ede..618153319 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -230,6 +230,41 @@
                 }
             }
         },
+        "/v1/jobs/{job_id}/cancel": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Jobs"
+                ],
+                "description": "",
+                "parameters": [
+                    {
+                        "name": "job_id",
+                        "in": "path",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/post-training/job/cancel": {
             "post": {
                 "responses": {
@@ -925,6 +960,81 @@
                 ]
             }
         },
+        "/v1/jobs/{job_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/JobInfo"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Jobs"
+                ],
+                "description": "",
+                "parameters": [
+                    {
+                        "name": "job_id",
+                        "in": "path",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Jobs"
+                ],
+                "description": "",
+                "parameters": [
+                    {
+                        "name": "job_id",
+                        "in": "path",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/inference/embeddings": {
             "post": {
                 "responses": {
@@ -2568,6 +2678,39 @@
                 ]
             }
         },
+        "/v1/jobs": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListJobsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Jobs"
+                ],
+                "description": "",
+                "parameters": []
+            }
+        },
         "/v1/models": {
             "get": {
                 "responses": {
@@ -4715,6 +4858,12 @@
             "CompletionResponse": {
                 "type": "object",
                 "properties": {
+                    "metrics": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricEvent"
+                        }
+                    },
                     "content": {
                         "type": "string",
                         "description": "The generated completion text"
@@ -5082,6 +5231,12 @@
             "CompletionResponseStreamChunk": {
                 "type": "object",
                 "properties": {
+                    "metrics": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricEvent"
+                        }
+                    },
                     "delta": {
                         "type": "string",
                         "description": "New content generated since last chunk. This can be one or more tokens."
@@ -7094,6 +7249,73 @@
                 ],
                 "title": "UnionType"
             },
+            "JobArtifact": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string"
+                    },
+                    "uri": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "title": "dict",
+                        "description": "dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's     (key, value) pairs dict(iterable) -> new dictionary initialized as if via:     d = {}     for k, v in iterable:         d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs     in the keyword argument list.  For example:  dict(one=1, two=2)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "name",
+                    "type",
+                    "uri",
+                    "metadata"
+                ],
+                "title": "JobArtifact"
+            },
+            "JobInfo": {
+                "type": "object",
+                "properties": {
+                    "uuid": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    },
+                    "scheduled_at": {
+                        "type": "string",
+                        "format": "date-time"
+                    },
+                    "started_at": {
+                        "type": "string",
+                        "format": "date-time"
+                    },
+                    "completed_at": {
+                        "type": "string",
+                        "format": "date-time"
+                    },
+                    "artifacts": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/JobArtifact"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "uuid",
+                    "type",
+                    "status",
+                    "artifacts"
+                ],
+                "title": "JobInfo"
+            },
             "Model": {
                 "type": "object",
                 "properties": {
@@ -8157,6 +8379,22 @@
                 "title": "ListFileResponse",
                 "description": "Response representing a list of file entries."
             },
+            "ListJobsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/JobInfo"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListJobsResponse"
+            },
             "ListModelsResponse": {
                 "type": "object",
                 "properties": {
@@ -10119,6 +10357,9 @@
         {
             "name": "Inspect"
         },
+        {
+            "name": "Jobs"
+        },
         {
             "name": "Models"
         },
@@ -10169,6 +10410,7 @@
                 "Files (Coming Soon)",
                 "Inference",
                 "Inspect",
+                "Jobs",
                 "Models",
                 "PostTraining (Coming Soon)",
                 "Safety",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 4c00fbe63..8cc7779ea 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -142,6 +142,30 @@ paths:
             schema:
               $ref: '#/components/schemas/BatchCompletionRequest'
         required: true
+  /v1/jobs/{job_id}/cancel:
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Jobs
+      description: ''
+      parameters:
+        - name: job_id
+          in: path
+          required: true
+          schema:
+            type: string
   /v1/post-training/job/cancel:
     post:
       responses:
@@ -633,6 +657,57 @@ paths:
           required: true
           schema:
             type: string
+  /v1/jobs/{job_id}:
+    get:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/JobInfo'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Jobs
+      description: ''
+      parameters:
+        - name: job_id
+          in: path
+          required: true
+          schema:
+            type: string
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Jobs
+      description: ''
+      parameters:
+        - name: job_id
+          in: path
+          required: true
+          schema:
+            type: string
   /v1/inference/embeddings:
     post:
       responses:
@@ -1731,6 +1806,29 @@ paths:
           required: true
           schema:
             type: string
+  /v1/jobs:
+    get:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListJobsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Jobs
+      description: ''
+      parameters: []
   /v1/models:
     get:
       responses:
@@ -3213,6 +3311,10 @@ components:
     CompletionResponse:
       type: object
       properties:
+        metrics:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricEvent'
         content:
           type: string
           description: The generated completion text
@@ -3531,6 +3633,10 @@ components:
     CompletionResponseStreamChunk:
       type: object
       properties:
+        metrics:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricEvent'
         delta:
           type: string
           description: >-
@@ -4901,6 +5007,60 @@ components:
       required:
         - type
       title: UnionType
+    JobArtifact:
+      type: object
+      properties:
+        name:
+          type: string
+        type:
+          type: string
+        uri:
+          type: string
+        metadata:
+          type: object
+          title: dict
+          description: >-
+            dict() -> new empty dictionary dict(mapping) -> new dictionary initialized
+            from a mapping object's     (key, value) pairs dict(iterable) -> new dictionary
+            initialized as if via:     d = {}     for k, v in iterable:         d[k]
+            = v dict(**kwargs) -> new dictionary initialized with the name=value pairs     in
+            the keyword argument list.  For example:  dict(one=1, two=2)
+      additionalProperties: false
+      required:
+        - name
+        - type
+        - uri
+        - metadata
+      title: JobArtifact
+    JobInfo:
+      type: object
+      properties:
+        uuid:
+          type: string
+        type:
+          type: string
+        status:
+          type: string
+        scheduled_at:
+          type: string
+          format: date-time
+        started_at:
+          type: string
+          format: date-time
+        completed_at:
+          type: string
+          format: date-time
+        artifacts:
+          type: array
+          items:
+            $ref: '#/components/schemas/JobArtifact'
+      additionalProperties: false
+      required:
+        - uuid
+        - type
+        - status
+        - artifacts
+      title: JobInfo
     Model:
       type: object
       properties:
@@ -5562,6 +5722,17 @@ components:
       title: ListFileResponse
       description: >-
         Response representing a list of file entries.
+    ListJobsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/JobInfo'
+      additionalProperties: false
+      required:
+        - data
+      title: ListJobsResponse
     ListModelsResponse:
       type: object
       properties:
@@ -6818,6 +6989,7 @@ tags:
       Llama Stack Inference API for generating completions, chat completions, and
       embeddings.
   - name: Inspect
+  - name: Jobs
   - name: Models
   - name: PostTraining (Coming Soon)
   - name: Safety
@@ -6842,6 +7014,7 @@ x-tagGroups:
       - Files (Coming Soon)
       - Inference
       - Inspect
+      - Jobs
       - Models
       - PostTraining (Coming Soon)
       - Safety
diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py
index 842a2b63d..866974ee7 100644
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@@ -36,6 +36,7 @@ class Api(Enum):
 
     # built-in API
     inspect = "inspect"
+    jobs = "jobs"
 
 
 @json_schema_type
diff --git a/llama_stack/apis/jobs/__init__.py b/llama_stack/apis/jobs/__init__.py
new file mode 100644
index 000000000..2bb4ff26c
--- /dev/null
+++ b/llama_stack/apis/jobs/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .jobs import *  # noqa: F401 F403
diff --git a/llama_stack/apis/jobs/jobs.py b/llama_stack/apis/jobs/jobs.py
new file mode 100644
index 000000000..af4a03b67
--- /dev/null
+++ b/llama_stack/apis/jobs/jobs.py
@@ -0,0 +1,61 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from datetime import datetime
+from typing import List, Optional, Protocol, runtime_checkable
+
+from pydantic import BaseModel
+
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+@json_schema_type
+class JobArtifact(BaseModel):
+    name: str
+    type: str
+    uri: str
+    metadata: dict
+
+
+@json_schema_type
+class JobInfo(BaseModel):
+    uuid: str
+    type: str
+    status: str
+
+    scheduled_at: Optional[datetime] = None
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+
+    artifacts: List[JobArtifact]
+
+
+class ListJobsResponse(BaseModel):
+    data: List[JobInfo]
+
+
+@runtime_checkable
+class Jobs(Protocol):
+    @webmethod(route="/jobs/{job_id}/cancel", method="POST")
+    async def cancel_job(
+        self,
+        job_id: str,
+    ) -> None: ...
+
+    @webmethod(route="/jobs/{job_id}", method="DELETE")
+    async def delete_job(
+        self,
+        job_id: str,
+    ) -> None: ...
+
+    @webmethod(route="/jobs", method="GET")
+    async def list_jobs(self) -> ListJobsResponse: ...
+
+    @webmethod(route="/jobs/{job_id}", method="GET")
+    async def get_job(
+        self,
+        job_id: str,
+    ) -> JobInfo: ...
diff --git a/llama_stack/apis/resource.py b/llama_stack/apis/resource.py
index 70ec63c55..a23f9d4fd 100644
--- a/llama_stack/apis/resource.py
+++ b/llama_stack/apis/resource.py
@@ -11,6 +11,7 @@ from pydantic import BaseModel, Field
 
 class ResourceType(Enum):
     model = "model"
+    job = "job"
     shield = "shield"
     vector_db = "vector_db"
     dataset = "dataset"
diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py
index 308081415..e35d2e562 100644
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@@ -56,7 +56,7 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:
 
 def providable_apis() -> List[Api]:
     routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
-    return [api for api in Api if api not in routing_table_apis and api != Api.inspect]
+    return [api for api in Api if api not in routing_table_apis and api not in (Api.inspect, Api.jobs)]
 
 
 def get_provider_registry() -> Dict[Api, Dict[str, ProviderSpec]]:
diff --git a/llama_stack/distribution/jobs.py b/llama_stack/distribution/jobs.py
new file mode 100644
index 000000000..3c6dcb1e3
--- /dev/null
+++ b/llama_stack/distribution/jobs.py
@@ -0,0 +1,48 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pydantic import BaseModel
+
+from llama_stack.apis.jobs import (
+    JobInfo,
+    Jobs,
+    ListJobsResponse,
+)
+from llama_stack.distribution.datatypes import StackRunConfig
+
+
+class DistributionJobsConfig(BaseModel):
+    run_config: StackRunConfig
+
+
+async def get_provider_impl(config, deps):
+    impl = DistributionJobsImpl(config, deps)
+    await impl.initialize()
+    return impl
+
+
+class DistributionJobsImpl(Jobs):
+    def __init__(self, config, deps):
+        self.config = config
+        self.deps = deps
+
+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        pass
+
+    async def list_jobs(self) -> ListJobsResponse:
+        raise NotImplementedError
+
+    async def delete_job(self, job_id: str) -> None:
+        raise NotImplementedError
+
+    async def cancel_job(self, job_id: str) -> None:
+        raise NotImplementedError
+
+    async def get_job(self, job_id: str) -> JobInfo:
+        raise NotImplementedError
diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py
index ab075f399..abbad9ae1 100644
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@@ -14,6 +14,7 @@ from llama_stack.apis.datasets import Datasets
 from llama_stack.apis.eval import Eval
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.inspect import Inspect
+from llama_stack.apis.jobs import Jobs
 from llama_stack.apis.models import Models
 from llama_stack.apis.post_training import PostTraining
 from llama_stack.apis.safety import Safety
@@ -62,6 +63,7 @@ def api_protocol_map() -> Dict[Api, Any]:
         Api.agents: Agents,
         Api.inference: Inference,
         Api.inspect: Inspect,
+        Api.jobs: Jobs,
         Api.vector_io: VectorIO,
         Api.vector_dbs: VectorDBs,
         Api.models: Models,
@@ -226,26 +228,32 @@ def sort_providers_by_deps(
         {k: list(v.values()) for k, v in providers_with_specs.items()}
     )
 
-    # Append built-in "inspect" provider
+    # Append built-in providers
     apis = [x[1].spec.api for x in sorted_providers]
-    sorted_providers.append(
+    deps = [x.value for x in apis]
+    config = run_config.model_dump()
+    sorted_providers += [
         (
-            "inspect",
+            name,
             ProviderWithSpec(
                 provider_id="__builtin__",
                 provider_type="__builtin__",
-                config={"run_config": run_config.model_dump()},
+                config={"run_config": config},
                 spec=InlineProviderSpec(
-                    api=Api.inspect,
+                    api=api,
                     provider_type="__builtin__",
-                    config_class="llama_stack.distribution.inspect.DistributionInspectConfig",
-                    module="llama_stack.distribution.inspect",
+                    config_class=f"llama_stack.distribution.{name}.Distribution{name.title()}Config",
+                    module=f"llama_stack.distribution.{name}",
                     api_dependencies=apis,
-                    deps__=[x.value for x in apis],
+                    deps__=deps,
                 ),
             ),
         )
-    )
+        for name, api in [
+            ("inspect", Api.inspect),
+            ("jobs", Api.jobs),
+        ]
+    ]
 
     logger.debug(f"Resolved {len(sorted_providers)} providers")
     for api_str, provider in sorted_providers:
diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py
index 7ca009b13..d438a1559 100644
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@@ -367,7 +367,9 @@ def main():
             continue
         apis_to_serve.add(inf.routing_table_api.value)
 
-    apis_to_serve.add("inspect")
+    # also include builtin APIs
+    apis_to_serve += {"inspect", "jobs"}
+
     for api_str in apis_to_serve:
         api = Api(api_str)
 
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 2b974739a..4a6fce62e 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -21,6 +21,7 @@ from llama_stack.apis.eval import Eval
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.inspect import Inspect
+from llama_stack.apis.jobs import Jobs
 from llama_stack.apis.models import Models
 from llama_stack.apis.post_training import PostTraining
 from llama_stack.apis.safety import Safety
@@ -62,6 +63,7 @@ class LlamaStack(
     Models,
     Shields,
     Inspect,
+    Jobs,
     ToolGroups,
     ToolRuntime,
     RAGToolRuntime,
diff --git a/pyproject.toml b/pyproject.toml
index 055fa7a55..80afaaef4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -168,6 +168,7 @@ exclude = [
     "^llama_stack/apis/files/files\\.py$",
     "^llama_stack/apis/inference/inference\\.py$",
     "^llama_stack/apis/inspect/inspect\\.py$",
+    "^llama_stack/apis/jobs/jobs\\.py$",
     "^llama_stack/apis/models/models\\.py$",
     "^llama_stack/apis/post_training/post_training\\.py$",
     "^llama_stack/apis/resource\\.py$",