chore: remove /v1/inference/completion and implementations

This commit is contained in:
Matthew Farrellee 2025-09-30 03:46:07 -04:00
parent 606f4cf281
commit 4b641d7127
78 changed files with 16143 additions and 17755 deletions

View file

@ -4,14 +4,11 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from collections.abc import AsyncGenerator, AsyncIterator
from collections.abc import AsyncIterator
from typing import Any
import litellm
from llama_stack.apis.common.content_types import (
InterleavedContent,
)
from llama_stack.apis.inference import (
ChatCompletionRequest,
ChatCompletionResponse,
@ -108,17 +105,6 @@ class LiteLLMOpenAIMixin(
else model_id
)
async def completion(
self,
model_id: str,
content: InterleavedContent,
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
stream: bool | None = False,
logprobs: LogProbConfig | None = None,
) -> AsyncGenerator:
raise NotImplementedError("LiteLLM does not support completion requests")
async def chat_completion(
self,
model_id: str,