mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
chore: unpublish /inference/chat-completion (#3609)
# What does this PR do?
BREAKING CHANGE: removes /inference/chat-completion route and updates
relevant documentation
## Test Plan
🤷
This commit is contained in:
parent
62e302613f
commit
cb33f45c11
23 changed files with 1448 additions and 2137 deletions
|
@ -44,7 +44,7 @@ The playground provides interactive pages for users to explore Llama Stack API c
|
|||
|
||||
**Simple Chat Interface**
|
||||
- Chat directly with Llama models through an intuitive interface
|
||||
- Uses the `/inference/chat-completion` streaming API under the hood
|
||||
- Uses the `/chat/completions` streaming API under the hood
|
||||
- Real-time message streaming for responsive interactions
|
||||
- Perfect for testing model capabilities and prompt engineering
|
||||
|
||||
|
|
|
@ -313,7 +313,7 @@ client = LlamaStackClient(
|
|||
)
|
||||
|
||||
# All API calls will be automatically traced
|
||||
response = client.inference.chat_completion(
|
||||
response = client.chat.completions.create(
|
||||
model="meta-llama/Llama-3.2-3B-Instruct",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
|
@ -327,7 +327,7 @@ with tracer.start_as_current_span("custom_operation") as span:
|
|||
span.set_attribute("user_id", "user123")
|
||||
span.set_attribute("operation_type", "chat_completion")
|
||||
|
||||
response = client.inference.chat_completion(
|
||||
response = client.chat.completions.create(
|
||||
model="meta-llama/Llama-3.2-3B-Instruct",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
|
|
|
@ -216,7 +216,6 @@ from llama_stack_client.types import (
|
|||
|
||||
Methods:
|
||||
|
||||
- <code title="post /v1/inference/chat-completion">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">chat_completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_chat_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_chat_completion_response.py">InferenceChatCompletionResponse</a></code>
|
||||
- <code title="post /v1/inference/embeddings">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">embeddings</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_embeddings_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/embeddings_response.py">EmbeddingsResponse</a></code>
|
||||
|
||||
## VectorIo
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue