Revert "docs: API docstrings cleanup for better documentation rendering (#3661)"

This reverts commit a8da6ba3a7.
2025-12-16 19:02:38 +00:00 · 2025-10-07 10:18:47 -04:00 · 2025-10-07 10:18:47 -04:00 · 1b7e893cb5
commit 1b7e893cb5
parent 6389bf5ffb
17 changed files with 414 additions and 614 deletions
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1053,9 +1053,7 @@ class InferenceProvider(Protocol):
        # for fill-in-the-middle type completion
        suffix: str | None = None,
    ) -> OpenAICompletion:
-        """Create completion.
-
-        Generate an OpenAI-compatible completion for the given prompt using the specified model.
+        """Generate an OpenAI-compatible completion for the given prompt using the specified model.

        :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
        :param prompt: The prompt to generate a completion for.
@ -1107,9 +1105,7 @@ class InferenceProvider(Protocol):
        top_p: float | None = None,
        user: str | None = None,
    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        """Create chat completions.
-
-        Generate an OpenAI-compatible chat completion for the given messages using the specified model.
+        """Generate an OpenAI-compatible chat completion for the given messages using the specified model.

        :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
        :param messages: List of messages in the conversation.
@ -1148,9 +1144,7 @@ class InferenceProvider(Protocol):
        dimensions: int | None = None,
        user: str | None = None,
    ) -> OpenAIEmbeddingsResponse:
-        """Create embeddings.
-
-        Generate OpenAI-compatible embeddings for the given input using the specified model.
+        """Generate OpenAI-compatible embeddings for the given input using the specified model.

        :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
        :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
@ -1163,9 +1157,7 @@ class InferenceProvider(Protocol):


 class Inference(InferenceProvider):
-    """Inference
-
-    Llama Stack Inference API for generating completions, chat completions, and embeddings.
+    """Llama Stack Inference API for generating completions, chat completions, and embeddings.

    This API provides the raw interface to the underlying models. Two kinds of models are supported:
    - LLM models: these models generate "raw" and "chat" (conversational) completions.
@ -1181,7 +1173,7 @@ class Inference(InferenceProvider):
        model: str | None = None,
        order: Order | None = Order.desc,
    ) -> ListOpenAIChatCompletionResponse:
-        """List chat completions.
+        """List all chat completions.

        :param after: The ID of the last chat completion to return.
        :param limit: The maximum number of chat completions to return.
@ -1196,9 +1188,7 @@ class Inference(InferenceProvider):
    )
    @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
-        """Get chat completion.
-
-        Describe a chat completion by its ID.
+        """Describe a chat completion by its ID.

        :param completion_id: ID of the chat completion.
        :returns: A OpenAICompletionWithInputMessages.