chore(apis): unpublish deprecated /v1/inference apis

2025-10-05 04:17:32 +00:00 · 2025-01-09 02:03:04 -05:00 · 2025-01-09 02:03:04 -05:00 · 26f4f3fe14
commit 26f4f3fe14
parent 478b4ff1e6
6 changed files with 1286 additions and 3770 deletions
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1026,7 +1026,6 @@ class InferenceProvider(Protocol):

    model_store: ModelStore | None = None

-    @webmethod(route="/inference/completion", method="POST")
    async def completion(
        self,
        model_id: str,
@ -1049,7 +1048,6 @@ class InferenceProvider(Protocol):
        """
        ...

-    @webmethod(route="/inference/batch-completion", method="POST", experimental=True)
    async def batch_completion(
        self,
        model_id: str,
@ -1070,7 +1068,6 @@ class InferenceProvider(Protocol):
        raise NotImplementedError("Batch completion is not implemented")
        return  # this is so mypy's safe-super rule will consider the method concrete

-    @webmethod(route="/inference/chat-completion", method="POST")
    async def chat_completion(
        self,
        model_id: str,
@ -1110,7 +1107,6 @@ class InferenceProvider(Protocol):
        """
        ...

-    @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True)
    async def batch_chat_completion(
        self,
        model_id: str,
@ -1135,7 +1131,6 @@ class InferenceProvider(Protocol):
        raise NotImplementedError("Batch chat completion is not implemented")
        return  # this is so mypy's safe-super rule will consider the method concrete

-    @webmethod(route="/inference/embeddings", method="POST")
    async def embeddings(
        self,
        model_id: str,