chore(apis): unpublish deprecated /v1/inference apis (#3297)

# What does this PR do? unpublish (make unavailable to users) the following apis - - `/v1/inference/completion`, replaced by `/v1/openai/v1/completions` - `/v1/inference/chat-completion`, replaced by `/v1/openai/v1/chat/completions` - `/v1/inference/embeddings`, replaced by `/v1/openai/v1/embeddings` - `/v1/inference/batch-completion`, replaced by `/v1/openai/v1/batches` - `/v1/inference/batch-chat-completion`, replaced by `/v1/openai/v1/batches` note: the implementations are still available for internal use, e.g. agents uses chat-completion.
2025-12-08 11:07:22 +00:00 · 2025-09-27 14:20:06 -04:00 · 2025-09-27 14:20:06 -04:00 · 53b15725b6
commit 53b15725b6
parent 60484c5c4e
23 changed files with 3134 additions and 1347 deletions
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1008,7 +1008,6 @@ class InferenceProvider(Protocol):

    model_store: ModelStore | None = None

-    @webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
    async def completion(
        self,
        model_id: str,
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@ -192,6 +192,14 @@ async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
                format = "png"

        return content, format
+    elif uri.startswith("data"):
+        # data:image/{format};base64,{data}
+        match = re.match(r"data:image/(\w+);base64,(.+)", uri)
+        if not match:
+            raise ValueError(f"Invalid data URL format, {uri[:40]}...")
+        fmt, image_data = match.groups()
+        content = base64.b64decode(image_data)
+        return content, fmt
    else:
        return None