chore: unpublish /inference/chat-completion (#3609)

# What does this PR do?

BREAKING CHANGE: removes /inference/chat-completion route and updates
relevant documentation

## Test Plan

🤷
This commit is contained in:
Matthew Farrellee 2025-09-30 14:00:42 -04:00 committed by GitHub
parent 62e302613f
commit cb33f45c11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 1448 additions and 2137 deletions

View file

@ -3615,7 +3615,7 @@
"from rich.pretty import pprint\n",
"\n",
"response = client.models.register(\n",
" model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
" model=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
" provider_id=\"ollama\",\n",
" provider_model_id=\"llama3.2:3b\",\n",
" # base model id\n",
@ -5762,7 +5762,7 @@
"source": [
"response = client.models.register(\n",
" # the model id here needs to be the finetuned checkpoint identifier\n",
" model_id=\"meta-llama/Llama-3.2-3B-Instruct-sft-0\",\n",
" model=\"meta-llama/Llama-3.2-3B-Instruct-sft-0\",\n",
" provider_id=\"ollama\",\n",
" provider_model_id=\"llama_3_2_finetuned:latest\",\n",
" # base model id\n",
@ -5816,14 +5816,14 @@
}
],
"source": [
"response = client.inference.chat_completion(\n",
" model_id=\"meta-llama/Llama-3.2-3B-Instruct-sft-0\",\n",
"response = client.chat.completions.create(\n",
" model=\"meta-llama/Llama-3.2-3B-Instruct-sft-0\",\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": \"What is the primary purpose of a W-2 form in relation to income tax?\"}\n",
" ],\n",
")\n",
"\n",
"print(response.completion_message.content)"
"print(response.choices[0].message.content)"
]
},
{