chore: remove /v1/inference/completion and implementations (#3622)

# What does this PR do? the /inference/completion route is gone. this removes the implementations. ## Test Plan ci
2025-12-03 18:00:36 +00:00 · 2025-10-01 11:36:53 -04:00 · 2025-10-01 11:36:53 -04:00 · f7c5ef4ec0
commit f7c5ef4ec0
parent ea15f2a270
75 changed files with 16141 additions and 17056 deletions
--- a/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py
@ -9,7 +9,6 @@ from typing import Any

 from llama_stack_client import AsyncLlamaStackClient

-from llama_stack.apis.common.content_types import InterleavedContent
 from llama_stack.apis.inference import (
    ChatCompletionResponse,
    ChatCompletionResponseStreamChunk,
@ -86,37 +85,6 @@ class PassthroughInferenceAdapter(Inference):
            provider_data=provider_data,
        )

-    async def completion(
-        self,
-        model_id: str,
-        content: InterleavedContent,
-        sampling_params: SamplingParams | None = None,
-        response_format: ResponseFormat | None = None,
-        stream: bool | None = False,
-        logprobs: LogProbConfig | None = None,
-    ) -> AsyncGenerator:
-        if sampling_params is None:
-            sampling_params = SamplingParams()
-        client = self._get_client()
-        model = await self.model_store.get_model(model_id)
-
-        request_params = {
-            "model_id": model.provider_resource_id,
-            "content": content,
-            "sampling_params": sampling_params,
-            "response_format": response_format,
-            "stream": stream,
-            "logprobs": logprobs,
-        }
-
-        request_params = {key: value for key, value in request_params.items() if value is not None}
-
-        # cast everything to json dict
-        json_params = self.cast_value_to_json_dict(request_params)
-
-        # only pass through the not None params
-        return await client.inference.completion(**json_params)
-
    async def chat_completion(
        self,
        model_id: str,