diff --git a/docs/source/references/index.md b/docs/source/references/index.md
index 85b1ad75a..2a5b0889e 100644
--- a/docs/source/references/index.md
+++ b/docs/source/references/index.md
@@ -1,15 +1,17 @@
# References
- [API Reference](api_reference/index) for the Llama Stack API specification
+- [Python SDK Reference](python_sdk_reference/index)
- [Llama CLI](llama_cli_reference/index) for building and running your Llama Stack server
- [Llama Stack Client CLI](llama_stack_client_cli_reference/index) for interacting with your Llama Stack server
```{toctree}
-:maxdepth: 2
+:maxdepth: 1
:hidden:
api_reference/index
+python_sdk_reference/index
llama_cli_reference/index
-llama_stack_client_cli_reference/index
+llama_stack_client_cli_reference
llama_cli_reference/download_models
```
diff --git a/docs/source/references/llama_stack_client_cli_reference/index.md b/docs/source/references/llama_stack_client_cli_reference.md
similarity index 100%
rename from docs/source/references/llama_stack_client_cli_reference/index.md
rename to docs/source/references/llama_stack_client_cli_reference.md
diff --git a/docs/source/references/python_sdk_reference/index.md b/docs/source/references/python_sdk_reference/index.md
new file mode 100644
index 000000000..8ee0375a5
--- /dev/null
+++ b/docs/source/references/python_sdk_reference/index.md
@@ -0,0 +1,348 @@
+# Python SDK Reference
+
+## Shared Types
+
+```python
+from llama_stack_client.types import (
+ Attachment,
+ BatchCompletion,
+ CompletionMessage,
+ SamplingParams,
+ SystemMessage,
+ ToolCall,
+ ToolResponseMessage,
+ UserMessage,
+)
+```
+
+## Telemetry
+
+Types:
+
+```python
+from llama_stack_client.types import TelemetryGetTraceResponse
+```
+
+Methods:
+
+- client.telemetry.get_trace(\*\*params) -> TelemetryGetTraceResponse
+- client.telemetry.log(\*\*params) -> None
+
+## Agents
+
+Types:
+
+```python
+from llama_stack_client.types import (
+ InferenceStep,
+ MemoryRetrievalStep,
+ RestAPIExecutionConfig,
+ ShieldCallStep,
+ ToolExecutionStep,
+ ToolParamDefinition,
+ AgentCreateResponse,
+)
+```
+
+Methods:
+
+- client.agents.create(\*\*params) -> AgentCreateResponse
+- client.agents.delete(\*\*params) -> None
+
+### Sessions
+
+Types:
+
+```python
+from llama_stack_client.types.agents import Session, SessionCreateResponse
+```
+
+Methods:
+
+- client.agents.sessions.create(\*\*params) -> SessionCreateResponse
+- client.agents.sessions.retrieve(\*\*params) -> Session
+- client.agents.sessions.delete(\*\*params) -> None
+
+### Steps
+
+Types:
+
+```python
+from llama_stack_client.types.agents import AgentsStep
+```
+
+Methods:
+
+- client.agents.steps.retrieve(\*\*params) -> AgentsStep
+
+### Turns
+
+Types:
+
+```python
+from llama_stack_client.types.agents import AgentsTurnStreamChunk, Turn, TurnStreamEvent
+```
+
+Methods:
+
+- client.agents.turns.create(\*\*params) -> AgentsTurnStreamChunk
+- client.agents.turns.retrieve(\*\*params) -> Turn
+
+## Datasets
+
+Types:
+
+```python
+from llama_stack_client.types import TrainEvalDataset
+```
+
+Methods:
+
+- client.datasets.create(\*\*params) -> None
+- client.datasets.delete(\*\*params) -> None
+- client.datasets.get(\*\*params) -> TrainEvalDataset
+
+## Evaluate
+
+Types:
+
+```python
+from llama_stack_client.types import EvaluationJob
+```
+
+### Jobs
+
+Types:
+
+```python
+from llama_stack_client.types.evaluate import (
+ EvaluationJobArtifacts,
+ EvaluationJobLogStream,
+ EvaluationJobStatus,
+)
+```
+
+Methods:
+
+- client.evaluate.jobs.list() -> EvaluationJob
+- client.evaluate.jobs.cancel(\*\*params) -> None
+
+#### Artifacts
+
+Methods:
+
+- client.evaluate.jobs.artifacts.list(\*\*params) -> EvaluationJobArtifacts
+
+#### Logs
+
+Methods:
+
+- client.evaluate.jobs.logs.list(\*\*params) -> EvaluationJobLogStream
+
+#### Status
+
+Methods:
+
+- client.evaluate.jobs.status.list(\*\*params) -> EvaluationJobStatus
+
+### QuestionAnswering
+
+Methods:
+
+- client.evaluate.question_answering.create(\*\*params) -> EvaluationJob
+
+## Evaluations
+
+Methods:
+
+- client.evaluations.summarization(\*\*params) -> EvaluationJob
+- client.evaluations.text_generation(\*\*params) -> EvaluationJob
+
+## Inference
+
+Types:
+
+```python
+from llama_stack_client.types import (
+ ChatCompletionStreamChunk,
+ CompletionStreamChunk,
+ TokenLogProbs,
+ InferenceChatCompletionResponse,
+ InferenceCompletionResponse,
+)
+```
+
+Methods:
+
+- client.inference.chat_completion(\*\*params) -> InferenceChatCompletionResponse
+- client.inference.completion(\*\*params) -> InferenceCompletionResponse
+
+### Embeddings
+
+Types:
+
+```python
+from llama_stack_client.types.inference import Embeddings
+```
+
+Methods:
+
+- client.inference.embeddings.create(\*\*params) -> Embeddings
+
+## Safety
+
+Types:
+
+```python
+from llama_stack_client.types import RunSheidResponse
+```
+
+Methods:
+
+- client.safety.run_shield(\*\*params) -> RunSheidResponse
+
+## Memory
+
+Types:
+
+```python
+from llama_stack_client.types import (
+ QueryDocuments,
+ MemoryCreateResponse,
+ MemoryRetrieveResponse,
+ MemoryListResponse,
+ MemoryDropResponse,
+)
+```
+
+Methods:
+
+- client.memory.create(\*\*params) -> object
+- client.memory.retrieve(\*\*params) -> object
+- client.memory.update(\*\*params) -> None
+- client.memory.list() -> object
+- client.memory.drop(\*\*params) -> str
+- client.memory.insert(\*\*params) -> None
+- client.memory.query(\*\*params) -> QueryDocuments
+
+### Documents
+
+Types:
+
+```python
+from llama_stack_client.types.memory import DocumentRetrieveResponse
+```
+
+Methods:
+
+- client.memory.documents.retrieve(\*\*params) -> DocumentRetrieveResponse
+- client.memory.documents.delete(\*\*params) -> None
+
+## PostTraining
+
+Types:
+
+```python
+from llama_stack_client.types import PostTrainingJob
+```
+
+Methods:
+
+- client.post_training.preference_optimize(\*\*params) -> PostTrainingJob
+- client.post_training.supervised_fine_tune(\*\*params) -> PostTrainingJob
+
+### Jobs
+
+Types:
+
+```python
+from llama_stack_client.types.post_training import (
+ PostTrainingJobArtifacts,
+ PostTrainingJobLogStream,
+ PostTrainingJobStatus,
+)
+```
+
+Methods:
+
+- client.post_training.jobs.list() -> PostTrainingJob
+- client.post_training.jobs.artifacts(\*\*params) -> PostTrainingJobArtifacts
+- client.post_training.jobs.cancel(\*\*params) -> None
+- client.post_training.jobs.logs(\*\*params) -> PostTrainingJobLogStream
+- client.post_training.jobs.status(\*\*params) -> PostTrainingJobStatus
+
+## RewardScoring
+
+Types:
+
+```python
+from llama_stack_client.types import RewardScoring, ScoredDialogGenerations
+```
+
+Methods:
+
+- client.reward_scoring.score(\*\*params) -> RewardScoring
+
+## SyntheticDataGeneration
+
+Types:
+
+```python
+from llama_stack_client.types import SyntheticDataGeneration
+```
+
+Methods:
+
+- client.synthetic_data_generation.generate(\*\*params) -> SyntheticDataGeneration
+
+## BatchInference
+
+Types:
+
+```python
+from llama_stack_client.types import BatchChatCompletion
+```
+
+Methods:
+
+- client.batch_inference.chat_completion(\*\*params) -> BatchChatCompletion
+- client.batch_inference.completion(\*\*params) -> BatchCompletion
+
+## Models
+
+Types:
+
+```python
+from llama_stack_client.types import ModelServingSpec
+```
+
+Methods:
+
+- client.models.list() -> ModelServingSpec
+- client.models.get(\*\*params) -> Optional
+
+## MemoryBanks
+
+Types:
+
+```python
+from llama_stack_client.types import MemoryBankSpec
+```
+
+Methods:
+
+- client.memory_banks.list() -> MemoryBankSpec
+- client.memory_banks.get(\*\*params) -> Optional
+
+## Shields
+
+Types:
+
+```python
+from llama_stack_client.types import ShieldSpec
+```
+
+Methods:
+
+- client.shields.list() -> ShieldSpec
+- client.shields.get(\*\*params) -> Optional