chore: unpublish /inference/chat-completion (#3609)

# What does this PR do?

BREAKING CHANGE: removes /inference/chat-completion route and updates
relevant documentation

## Test Plan

🤷
This commit is contained in:
Matthew Farrellee 2025-09-30 14:00:42 -04:00 committed by GitHub
parent 62e302613f
commit cb33f45c11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 1448 additions and 2137 deletions

View file

@ -687,23 +687,17 @@
"metadata": {},
"outputs": [],
"source": [
"completion = client.inference.chat_completion(\n",
" model_id=CUSTOMIZED_MODEL,\n",
"completion = client.chat.completions.create(\n",
" model=CUSTOMIZED_MODEL,\n",
" messages=test_sample[\"messages\"],\n",
" tools=test_sample[\"tools\"],\n",
" tool_choice=\"auto\",\n",
" stream=False,\n",
" sampling_params={\n",
" \"max_tokens\": 512,\n",
" \"strategy\": {\n",
" \"type\": \"top_p\",\n",
" \"temperature\": 0.1,\n",
" \"top_p\": 0.7,\n",
" }\n",
" },\n",
" max_tokens=512,\n",
" temperature=0.1,\n",
")\n",
"\n",
"completion.completion_message.tool_calls"
"completion.choices[0].message.tool_calls"
]
},
{

View file

@ -423,42 +423,30 @@
" violation = self.check_guardrails(user_message.get(\"content\"))\n",
" \n",
" if violation is None:\n",
" completion = client.inference.chat_completion(\n",
" model_id=self.customized_model,\n",
" completion = client.chat.completions.create(\n",
" model=self.customized_model,\n",
" messages=[user_message],\n",
" tools=tools,\n",
" tool_choice=\"auto\",\n",
" stream=False,\n",
" sampling_params={\n",
" \"max_tokens\": 1024,\n",
" \"strategy\": {\n",
" \"type\": \"top_p\",\n",
" \"top_p\": 0.7,\n",
" \"temperature\": 0.2\n",
" }\n",
" }\n",
" max_tokens=1024,\n",
" temperature=0.2,\n",
" )\n",
" return completion.completion_message\n",
" return completion.choices[0].message.content\n",
" else:\n",
" return f\"Not a safe input, the guardrails has resulted in a violation: {violation}. Tool-calling shall not happen\"\n",
" \n",
" elif self.guardrails == \"OFF\":\n",
" completion = client.inference.chat_completion(\n",
" model_id=self.customized_model,\n",
" completion = client.chat.completions.create(\n",
" model=self.customized_model,\n",
" messages=[user_message],\n",
" tools=tools,\n",
" tool_choice=\"auto\",\n",
" stream=False,\n",
" sampling_params={\n",
" \"max_tokens\": 1024,\n",
" \"strategy\": {\n",
" \"type\": \"top_p\",\n",
" \"top_p\": 0.7,\n",
" \"temperature\": 0.2\n",
" }\n",
" }\n",
" max_tokens=1024,\n",
" temperature=0.2,\n",
" )\n",
" return completion.completion_message"
" return completion.choices[0].message.content"
]
},
{