chore(apis): unpublish deprecated /v1/inference apis (#3297)

# What does this PR do? unpublish (make unavailable to users) the following apis - - `/v1/inference/completion`, replaced by `/v1/openai/v1/completions` - `/v1/inference/chat-completion`, replaced by `/v1/openai/v1/chat/completions` - `/v1/inference/embeddings`, replaced by `/v1/openai/v1/embeddings` - `/v1/inference/batch-completion`, replaced by `/v1/openai/v1/batches` - `/v1/inference/batch-chat-completion`, replaced by `/v1/openai/v1/batches` note: the implementations are still available for internal use, e.g. agents uses chat-completion.
2025-12-04 18:13:44 +00:00 · 2025-09-27 14:20:06 -04:00 · 2025-09-27 14:20:06 -04:00 · 53b15725b6
commit 53b15725b6
parent 60484c5c4e
23 changed files with 3134 additions and 1347 deletions
--- a/tests/integration/telemetry/test_telemetry.py
+++ b/tests/integration/telemetry/test_telemetry.py
@ -32,8 +32,8 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
        )

    for i in range(2):
-        llama_stack_client.inference.chat_completion(
-            model_id=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
+        llama_stack_client.chat.completions.create(
+            model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
        )

    start_time = time.time()