mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
chore(apis): unpublish deprecated /v1/inference apis (#3297)
# What does this PR do? unpublish (make unavailable to users) the following apis - - `/v1/inference/completion`, replaced by `/v1/openai/v1/completions` - `/v1/inference/chat-completion`, replaced by `/v1/openai/v1/chat/completions` - `/v1/inference/embeddings`, replaced by `/v1/openai/v1/embeddings` - `/v1/inference/batch-completion`, replaced by `/v1/openai/v1/batches` - `/v1/inference/batch-chat-completion`, replaced by `/v1/openai/v1/batches` note: the implementations are still available for internal use, e.g. agents uses chat-completion.
This commit is contained in:
parent
60484c5c4e
commit
53b15725b6
23 changed files with 3134 additions and 1347 deletions
|
@ -32,8 +32,8 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
|
|||
)
|
||||
|
||||
for i in range(2):
|
||||
llama_stack_client.inference.chat_completion(
|
||||
model_id=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
|
||||
llama_stack_client.chat.completions.create(
|
||||
model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue