chore(apis): unpublish deprecated /v1/inference apis (#3297)

# What does this PR do?

unpublish (make unavailable to users) the following apis -
 - `/v1/inference/completion`, replaced by `/v1/openai/v1/completions`
- `/v1/inference/chat-completion`, replaced by
`/v1/openai/v1/chat/completions`
 - `/v1/inference/embeddings`, replaced by `/v1/openai/v1/embeddings`
 - `/v1/inference/batch-completion`, replaced by `/v1/openai/v1/batches`
- `/v1/inference/batch-chat-completion`, replaced by
`/v1/openai/v1/batches`

note: the implementations are still available for internal use, e.g.
agents uses chat-completion.
This commit is contained in:
Matthew Farrellee 2025-09-27 14:20:06 -04:00 committed by GitHub
parent 60484c5c4e
commit 53b15725b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 3134 additions and 1347 deletions

View file

@ -1008,7 +1008,6 @@ class InferenceProvider(Protocol):
model_store: ModelStore | None = None
@webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
async def completion(
self,
model_id: str,

View file

@ -192,6 +192,14 @@ async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
format = "png"
return content, format
elif uri.startswith("data"):
# data:image/{format};base64,{data}
match = re.match(r"data:image/(\w+);base64,(.+)", uri)
if not match:
raise ValueError(f"Invalid data URL format, {uri[:40]}...")
fmt, image_data = match.groups()
content = base64.b64decode(image_data)
return content, fmt
else:
return None