chore: remove /v1/inference/completion and implementations

2025-10-04 04:04:14 +00:00 · 2025-09-30 03:46:07 -04:00 · 2025-09-30 03:46:07 -04:00 · 4b641d7127
commit 4b641d7127
parent 606f4cf281
78 changed files with 16143 additions and 17755 deletions
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@ -4,14 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from collections.abc import AsyncGenerator, AsyncIterator
+from collections.abc import AsyncIterator
 from typing import Any

 import litellm

-from llama_stack.apis.common.content_types import (
-    InterleavedContent,
-)
 from llama_stack.apis.inference import (
    ChatCompletionRequest,
    ChatCompletionResponse,
@ -108,17 +105,6 @@ class LiteLLMOpenAIMixin(
            else model_id
        )

-    async def completion(
-        self,
-        model_id: str,
-        content: InterleavedContent,
-        sampling_params: SamplingParams | None = None,
-        response_format: ResponseFormat | None = None,
-        stream: bool | None = False,
-        logprobs: LogProbConfig | None = None,
-    ) -> AsyncGenerator:
-        raise NotImplementedError("LiteLLM does not support completion requests")
-
    async def chat_completion(
        self,
        model_id: str,