chore: unpublish /inference/chat-completion (#3609)

# What does this PR do?

BREAKING CHANGE: removes /inference/chat-completion route and updates
relevant documentation

## Test Plan

🤷
This commit is contained in:
Matthew Farrellee 2025-09-30 14:00:42 -04:00 committed by GitHub
parent 62e302613f
commit cb33f45c11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 1448 additions and 2137 deletions

View file

@ -419,21 +419,15 @@
"outputs": [],
"source": [
"# Test inference\n",
"response = client.inference.chat_completion(\n",
"response = client.chat.completions.create(\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": sample_prompt}\n",
" ],\n",
" model_id=BASE_MODEL,\n",
" sampling_params={\n",
" \"max_tokens\": 20,\n",
" \"strategy\": {\n",
" \"type\": \"top_p\",\n",
" \"temperature\": 0.7,\n",
" \"top_p\": 0.9\n",
" }\n",
" }\n",
" model=BASE_MODEL,\n",
" max_tokens=20,\n",
" temperature=0.7,\n",
")\n",
"print(f\"Inference response: {response.completion_message.content}\")"
"print(f\"Inference response: {response.choices[0].message.content}\")"
]
},
{
@ -945,20 +939,14 @@
"outputs": [],
"source": [
"# Test inference\n",
"response = client.inference.chat_completion(\n",
"response = client.chat.completions.create(\n",
" messages=sample_messages,\n",
" model_id=BASE_MODEL,\n",
" sampling_params={\n",
" \"max_tokens\": 20,\n",
" \"strategy\": {\n",
" \"type\": \"top_p\",\n",
" \"temperature\": 0.7,\n",
" \"top_p\": 0.9\n",
" }\n",
" }\n",
" model=BASE_MODEL,\n",
" max_tokens=20,\n",
" temperature=0.7,\n",
")\n",
"assert response.completion_message.content is not None\n",
"print(f\"Inference response: {response.completion_message.content}\")"
"assert response.choices[0].message.content is not None\n",
"print(f\"Inference response: {response.choices[0].message.content}\")"
]
},
{
@ -1438,15 +1426,13 @@
"outputs": [],
"source": [
"# Check inference without guardrails\n",
"response = client.inference.chat_completion(\n",
"response = client.chat.completions.create(\n",
" messages=[message],\n",
" model_id=BASE_MODEL,\n",
" sampling_params={\n",
" \"max_tokens\": 150,\n",
" }\n",
" model=BASE_MODEL,\n",
" max_tokens=150,\n",
")\n",
"assert response.completion_message.content is not None\n",
"print(f\"Inference response: {response.completion_message.content}\")"
"assert response.choices[0].message.content is not None\n",
"print(f\"Inference response: {response.choices[0].message.content}\")"
]
},
{