mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-08 13:00:52 +00:00
chore: unpublish /inference/chat-completion (#3609)
# What does this PR do?
BREAKING CHANGE: removes /inference/chat-completion route and updates
relevant documentation
## Test Plan
🤷
This commit is contained in:
parent
62e302613f
commit
cb33f45c11
23 changed files with 1448 additions and 2137 deletions
|
@ -44,7 +44,7 @@ The playground provides interactive pages for users to explore Llama Stack API c
|
|||
|
||||
**Simple Chat Interface**
|
||||
- Chat directly with Llama models through an intuitive interface
|
||||
- Uses the `/inference/chat-completion` streaming API under the hood
|
||||
- Uses the `/chat/completions` streaming API under the hood
|
||||
- Real-time message streaming for responsive interactions
|
||||
- Perfect for testing model capabilities and prompt engineering
|
||||
|
||||
|
|
|
@ -313,7 +313,7 @@ client = LlamaStackClient(
|
|||
)
|
||||
|
||||
# All API calls will be automatically traced
|
||||
response = client.inference.chat_completion(
|
||||
response = client.chat.completions.create(
|
||||
model="meta-llama/Llama-3.2-3B-Instruct",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
|
@ -327,7 +327,7 @@ with tracer.start_as_current_span("custom_operation") as span:
|
|||
span.set_attribute("user_id", "user123")
|
||||
span.set_attribute("operation_type", "chat_completion")
|
||||
|
||||
response = client.inference.chat_completion(
|
||||
response = client.chat.completions.create(
|
||||
model="meta-llama/Llama-3.2-3B-Instruct",
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue