docs: API docstrings cleanup for better documentation rendering (#3661)

# What does this PR do? * Cleans up API docstrings for better documentation rendering <img width="2346" height="1126" alt="image" src="https://github.com/user-attachments/assets/516b09a1-2d5b-4614-a3a9-13431fc21fc1" /> ## Test Plan * Manual testing --------- Signed-off-by: Doug Edgar <dedgar@redhat.com> Signed-off-by: Charlie Doern <cdoern@redhat.com> Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: ehhuang <ehhuang@users.noreply.github.com> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com> Co-authored-by: Matthew Farrellee <matt@cs.wisc.edu> Co-authored-by: Doug Edgar <dedgar@redhat.com> Co-authored-by: Christian Zaccaria <73656840+ChristianZaccaria@users.noreply.github.com> Co-authored-by: Anastas Stoyanovsky <contact@anastas.eu> Co-authored-by: Charlie Doern <cdoern@redhat.com> Co-authored-by: Francisco Arceo <arceofrancisco@gmail.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Young Han <110819238+seyeong-han@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-12-07 18:57:21 +00:00 · 2025-10-06 10:46:33 -07:00 · 2025-10-06 10:46:33 -07:00 · a8da6ba3a7
commit a8da6ba3a7
parent 892ea759fa
17 changed files with 611 additions and 411 deletions
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1053,7 +1053,9 @@ class InferenceProvider(Protocol):
        # for fill-in-the-middle type completion
        suffix: str | None = None,
    ) -> OpenAICompletion:
-        """Generate an OpenAI-compatible completion for the given prompt using the specified model.
+        """Create completion.
+
+        Generate an OpenAI-compatible completion for the given prompt using the specified model.

        :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
        :param prompt: The prompt to generate a completion for.
@ -1105,7 +1107,9 @@ class InferenceProvider(Protocol):
        top_p: float | None = None,
        user: str | None = None,
    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        """Generate an OpenAI-compatible chat completion for the given messages using the specified model.
+        """Create chat completions.
+
+        Generate an OpenAI-compatible chat completion for the given messages using the specified model.

        :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
        :param messages: List of messages in the conversation.
@ -1144,7 +1148,9 @@ class InferenceProvider(Protocol):
        dimensions: int | None = None,
        user: str | None = None,
    ) -> OpenAIEmbeddingsResponse:
-        """Generate OpenAI-compatible embeddings for the given input using the specified model.
+        """Create embeddings.
+
+        Generate OpenAI-compatible embeddings for the given input using the specified model.

        :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
        :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
@ -1157,7 +1163,9 @@ class InferenceProvider(Protocol):


 class Inference(InferenceProvider):
-    """Llama Stack Inference API for generating completions, chat completions, and embeddings.
+    """Inference
+
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.

    This API provides the raw interface to the underlying models. Two kinds of models are supported:
    - LLM models: these models generate "raw" and "chat" (conversational) completions.
@ -1173,7 +1181,7 @@ class Inference(InferenceProvider):
        model: str | None = None,
        order: Order | None = Order.desc,
    ) -> ListOpenAIChatCompletionResponse:
-        """List all chat completions.
+        """List chat completions.

        :param after: The ID of the last chat completion to return.
        :param limit: The maximum number of chat completions to return.
@ -1188,7 +1196,9 @@ class Inference(InferenceProvider):
    )
    @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
-        """Describe a chat completion by its ID.
+        """Get chat completion.
+
+        Describe a chat completion by its ID.

        :param completion_id: ID of the chat completion.
        :returns: A OpenAICompletionWithInputMessages.