docs: Improvements on provider_codegen for type hints and multi-line yaml descriptions (#4033)

# What does this PR do?

This PR improves type hint cleanup in auto-generated provider
documentation by adding regex logic.

**Issues Fixed:**
- Type hints with missing closing brackets (e.g., `list[str` instead of
`list[str]`)
- Types showing as `<class 'bool'>`, `<class 'str'>` instead of `bool`,
`str`
- The multi-line YAML frontmatter in index documentation files wasn't
ideal, so we now add the proper `|` character.

**Changes:**
1. Replaced string replacement (`.replace`) with regex-based type
cleaning to preserve the trailing bracket in case of `list` and `dict`.
2. Adds the `|` character for multi-line YAML descriptions.
3. I have regenerated the docs. However, let me know if that's not
needed.

## Test Plan

1. Ran uv run python scripts/provider_codegen.py - successfully
regenerated all docs
2. We can see that the updated docs handle correctly type hint cleanup
and multi-line yaml descriptions have now the `|` character.

### Note to the reviewer(s)

This is my first contribution to your lovely repo! Initially I was going
thourgh docs (wanted to use `remote::gemini` as provider) and realized
the issue. I've read the
[CONTRIBUTING.md](https://github.com/llamastack/llama-stack/blob/main/CONTRIBUTING.md)
and decided to open the PR. Let me know if there's anything I did wrong
and I'll update my PR!

---------

Signed-off-by: thepetk <thepetk@gmail.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
Theofanis Petkos 2025-11-17 20:35:28 +00:00 committed by GitHub
parent fe91d331ef
commit 5fe6098350
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
65 changed files with 241 additions and 197 deletions

View file

@ -8,7 +8,8 @@
import subprocess
import sys
from pathlib import Path
from typing import Any
from types import UnionType
from typing import Annotated, Any, Union, get_args, get_origin
from pydantic_core import PydanticUndefined
from rich.progress import Progress, SpinnerColumn, TextColumn
@ -51,6 +52,41 @@ class ChangedPathTracker:
return self._changed_paths
def extract_type_annotation(annotation: Any) -> str:
"""extract a type annotation into a clean string representation."""
if annotation is None:
return "Any"
if annotation is type(None):
return "None"
origin = get_origin(annotation)
args = get_args(annotation)
# recursive workaround for Annotated types to ignore FieldInfo part
if origin is Annotated and args:
return extract_type_annotation(args[0])
if origin in [Union, UnionType]:
non_none_args = [arg for arg in args if arg is not type(None)]
has_none = len(non_none_args) < len(args)
if len(non_none_args) == 1:
formatted = extract_type_annotation(non_none_args[0])
return f"{formatted} | None" if has_none else formatted
else:
formatted_args = [extract_type_annotation(arg) for arg in non_none_args]
result = " | ".join(formatted_args)
return f"{result} | None" if has_none else result
if origin is not None and args:
origin_name = getattr(origin, "__name__", str(origin))
formatted_args = [extract_type_annotation(arg) for arg in args]
return f"{origin_name}[{', '.join(formatted_args)}]"
return annotation.__name__ if hasattr(annotation, "__name__") else str(annotation)
def get_config_class_info(config_class_path: str) -> dict[str, Any]:
"""Extract configuration information from a config class."""
try:
@ -78,14 +114,8 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
for field_name, field in config_class.model_fields.items():
if getattr(field, "exclude", False):
continue
field_type = str(field.annotation) if field.annotation else "Any"
# this string replace is ridiculous
field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
field_type = field_type.replace("llama_stack_api.inference.", "")
field_type = field_type.replace("llama_stack.providers.", "")
field_type = field_type.replace("llama_stack_api.datatypes.", "")
field_type = extract_type_annotation(field.annotation)
default_value = field.default
if field.default_factory is not None:
@ -345,8 +375,16 @@ def generate_index_docs(api_name: str, api_docstring: str | None, provider_entri
# Add YAML frontmatter for index
md_lines.append("---")
if api_docstring:
clean_desc = api_docstring.strip().replace('"', '\\"')
md_lines.append(f'description: "{clean_desc}"')
# Handle multi-line descriptions in YAML
if "\n" in api_docstring.strip():
md_lines.append("description: |")
for line in api_docstring.strip().split("\n"):
# Avoid trailing whitespace by only adding spaces to non-empty lines
md_lines.append(f" {line}" if line.strip() else "")
else:
# For single line descriptions, format properly for YAML
clean_desc = api_docstring.strip().replace('"', '\\"')
md_lines.append(f'description: "{clean_desc}"')
md_lines.append(f"sidebar_label: {sidebar_label}")
md_lines.append(f"title: {api_name.title()}")
md_lines.append("---")