mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
chore: unpublish /inference/chat-completion (#3609)
# What does this PR do?
BREAKING CHANGE: removes /inference/chat-completion route and updates
relevant documentation
## Test Plan
🤷
This commit is contained in:
parent
62e302613f
commit
cb33f45c11
23 changed files with 1448 additions and 2137 deletions
|
@ -419,21 +419,15 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# Test inference\n",
|
||||
"response = client.inference.chat_completion(\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"user\", \"content\": sample_prompt}\n",
|
||||
" ],\n",
|
||||
" model_id=BASE_MODEL,\n",
|
||||
" sampling_params={\n",
|
||||
" \"max_tokens\": 20,\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"top_p\",\n",
|
||||
" \"temperature\": 0.7,\n",
|
||||
" \"top_p\": 0.9\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" model=BASE_MODEL,\n",
|
||||
" max_tokens=20,\n",
|
||||
" temperature=0.7,\n",
|
||||
")\n",
|
||||
"print(f\"Inference response: {response.completion_message.content}\")"
|
||||
"print(f\"Inference response: {response.choices[0].message.content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -945,20 +939,14 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# Test inference\n",
|
||||
"response = client.inference.chat_completion(\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" messages=sample_messages,\n",
|
||||
" model_id=BASE_MODEL,\n",
|
||||
" sampling_params={\n",
|
||||
" \"max_tokens\": 20,\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"top_p\",\n",
|
||||
" \"temperature\": 0.7,\n",
|
||||
" \"top_p\": 0.9\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" model=BASE_MODEL,\n",
|
||||
" max_tokens=20,\n",
|
||||
" temperature=0.7,\n",
|
||||
")\n",
|
||||
"assert response.completion_message.content is not None\n",
|
||||
"print(f\"Inference response: {response.completion_message.content}\")"
|
||||
"assert response.choices[0].message.content is not None\n",
|
||||
"print(f\"Inference response: {response.choices[0].message.content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1438,15 +1426,13 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# Check inference without guardrails\n",
|
||||
"response = client.inference.chat_completion(\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" messages=[message],\n",
|
||||
" model_id=BASE_MODEL,\n",
|
||||
" sampling_params={\n",
|
||||
" \"max_tokens\": 150,\n",
|
||||
" }\n",
|
||||
" model=BASE_MODEL,\n",
|
||||
" max_tokens=150,\n",
|
||||
")\n",
|
||||
"assert response.completion_message.content is not None\n",
|
||||
"print(f\"Inference response: {response.completion_message.content}\")"
|
||||
"assert response.choices[0].message.content is not None\n",
|
||||
"print(f\"Inference response: {response.choices[0].message.content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue