chore: remove /v1/inference/completion and implementations (#3622)

# What does this PR do?

the /inference/completion route is gone. this removes the
implementations.

## Test Plan

ci
This commit is contained in:
Matthew Farrellee 2025-10-01 11:36:53 -04:00 committed by GitHub
parent ea15f2a270
commit f7c5ef4ec0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
75 changed files with 16141 additions and 17056 deletions

View file

@ -9,7 +9,6 @@ from typing import Any
from llama_stack_client import AsyncLlamaStackClient
from llama_stack.apis.common.content_types import InterleavedContent
from llama_stack.apis.inference import (
ChatCompletionResponse,
ChatCompletionResponseStreamChunk,
@ -86,37 +85,6 @@ class PassthroughInferenceAdapter(Inference):
provider_data=provider_data,
)
async def completion(
self,
model_id: str,
content: InterleavedContent,
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
stream: bool | None = False,
logprobs: LogProbConfig | None = None,
) -> AsyncGenerator:
if sampling_params is None:
sampling_params = SamplingParams()
client = self._get_client()
model = await self.model_store.get_model(model_id)
request_params = {
"model_id": model.provider_resource_id,
"content": content,
"sampling_params": sampling_params,
"response_format": response_format,
"stream": stream,
"logprobs": logprobs,
}
request_params = {key: value for key, value in request_params.items() if value is not None}
# cast everything to json dict
json_params = self.cast_value_to_json_dict(request_params)
# only pass through the not None params
return await client.inference.completion(**json_params)
async def chat_completion(
self,
model_id: str,