chore: unpublish /inference/chat-completion (#3609)

# What does this PR do?

BREAKING CHANGE: removes /inference/chat-completion route and updates
relevant documentation

## Test Plan

🤷
This commit is contained in:
Matthew Farrellee 2025-09-30 14:00:42 -04:00 committed by GitHub
parent 62e302613f
commit cb33f45c11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 1448 additions and 2137 deletions

View file

@ -543,15 +543,15 @@
"source": [
"model_id = \"meta-llama/Llama-3.3-70B-Instruct\"\n",
"\n",
"response = client.inference.chat_completion(\n",
" model_id=model_id,\n",
"response = client.chat.completions.create(\n",
" model=model_id,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
" {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"},\n",
" ],\n",
")\n",
"\n",
"print(response.completion_message.content)\n"
"print(response.choices[0].message.content)\n"
]
},
{
@ -625,16 +625,16 @@
" user_message = {\"role\": \"user\", \"content\": user_input}\n",
" conversation_history.append(user_message)\n",
"\n",
" response = client.inference.chat_completion(\n",
" response = client.chat.completions.create(\n",
" messages=conversation_history,\n",
" model_id=model_id,\n",
" model=model_id,\n",
" )\n",
" cprint(f\"> Response: {response.completion_message.content}\", \"cyan\")\n",
" cprint(f\"> Response: {response.choices[0].message.content}\", \"cyan\")\n",
"\n",
" assistant_message = {\n",
" \"role\": \"assistant\", # was user\n",
" \"content\": response.completion_message.content,\n",
" \"stop_reason\": response.completion_message.stop_reason,\n",
" \"content\": response.choices[0].message.content,\n",
" \"stop_reason\": response.choices[0].finish_reason,\n",
" }\n",
" conversation_history.append(assistant_message)\n",
"\n",
@ -691,16 +691,16 @@
" user_message = {\"role\": \"user\", \"content\": user_input}\n",
" conversation_history.append(user_message)\n",
"\n",
" response = client.inference.chat_completion(\n",
" response = client.chat.completions.create(\n",
" messages=conversation_history,\n",
" model_id=model_id,\n",
" model=model_id,\n",
" )\n",
" cprint(f\"> Response: {response.completion_message.content}\", \"cyan\")\n",
" cprint(f\"> Response: {response.choices[0].message.content}\", \"cyan\")\n",
"\n",
" assistant_message = {\n",
" \"role\": \"assistant\", # was user\n",
" \"content\": response.completion_message.content,\n",
" \"stop_reason\": response.completion_message.stop_reason,\n",
" \"content\": response.choices[0].message.content,\n",
" \"stop_reason\": response.choices[0].finish_reason,\n",
" }\n",
" conversation_history.append(assistant_message)\n",
"\n",
@ -763,9 +763,9 @@
"message = {\"role\": \"user\", \"content\": \"Write me a sonnet about llama\"}\n",
"print(f'User> {message[\"content\"]}')\n",
"\n",
"response = client.inference.chat_completion(\n",
"response = client.chat.completions.create(\n",
" messages=[message],\n",
" model_id=model_id,\n",
" model=model_id,\n",
" stream=True, # <-----------\n",
")\n",
"\n",
@ -2917,7 +2917,7 @@
}
],
"source": [
"response = client.inference.chat_completion(\n",
"response = client.chat.completions.create(\n",
" messages=[\n",
" {\n",
" \"role\": \"user\",\n",
@ -2937,11 +2937,11 @@
" ]\n",
" }\n",
" ],\n",
" model_id=vision_model_id,\n",
" model=vision_model_id,\n",
" stream=False,\n",
")\n",
"\n",
"print(response.completion_message.content)"
"print(response.choices[0].message.content)"
]
},
{