chore: unpublish /inference/chat-completion (#3609)

# What does this PR do? BREAKING CHANGE: removes /inference/chat-completion route and updates relevant documentation ## Test Plan 🤷
2025-12-03 09:53:45 +00:00 · 2025-09-30 14:00:42 -04:00 · 2025-09-30 14:00:42 -04:00 · cb33f45c11
commit cb33f45c11
parent 62e302613f
23 changed files with 1448 additions and 2137 deletions
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@ -543,15 +543,15 @@
      "source": [
        "model_id = \"meta-llama/Llama-3.3-70B-Instruct\"\n",
        "\n",
-        "response = client.inference.chat_completion(\n",
-        "    model_id=model_id,\n",
+        "response = client.chat.completions.create(\n",
+        "    model=model_id,\n",
        "    messages=[\n",
        "        {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
        "        {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"},\n",
        "    ],\n",
        ")\n",
        "\n",
-        "print(response.completion_message.content)\n"
+        "print(response.choices[0].message.content)\n"
      ]
    },
    {
@ -625,16 +625,16 @@
        "        user_message = {\"role\": \"user\", \"content\": user_input}\n",
        "        conversation_history.append(user_message)\n",
        "\n",
-        "        response = client.inference.chat_completion(\n",
+        "        response = client.chat.completions.create(\n",
        "            messages=conversation_history,\n",
-        "            model_id=model_id,\n",
+        "            model=model_id,\n",
        "        )\n",
-        "        cprint(f\"> Response: {response.completion_message.content}\", \"cyan\")\n",
+        "        cprint(f\"> Response: {response.choices[0].message.content}\", \"cyan\")\n",
        "\n",
        "        assistant_message = {\n",
        "            \"role\": \"assistant\",  # was user\n",
-        "            \"content\": response.completion_message.content,\n",
-        "            \"stop_reason\": response.completion_message.stop_reason,\n",
+        "            \"content\": response.choices[0].message.content,\n",
+        "            \"stop_reason\": response.choices[0].finish_reason,\n",
        "        }\n",
        "        conversation_history.append(assistant_message)\n",
        "\n",
@ -691,16 +691,16 @@
        "        user_message = {\"role\": \"user\", \"content\": user_input}\n",
        "        conversation_history.append(user_message)\n",
        "\n",
-        "        response = client.inference.chat_completion(\n",
+        "        response = client.chat.completions.create(\n",
        "            messages=conversation_history,\n",
-        "            model_id=model_id,\n",
+        "            model=model_id,\n",
        "        )\n",
-        "        cprint(f\"> Response: {response.completion_message.content}\", \"cyan\")\n",
+        "        cprint(f\"> Response: {response.choices[0].message.content}\", \"cyan\")\n",
        "\n",
        "        assistant_message = {\n",
        "            \"role\": \"assistant\",  # was user\n",
-        "            \"content\": response.completion_message.content,\n",
-        "            \"stop_reason\": response.completion_message.stop_reason,\n",
+        "            \"content\": response.choices[0].message.content,\n",
+        "            \"stop_reason\": response.choices[0].finish_reason,\n",
        "        }\n",
        "        conversation_history.append(assistant_message)\n",
        "\n",
@ -763,9 +763,9 @@
        "message = {\"role\": \"user\", \"content\": \"Write me a sonnet about llama\"}\n",
        "print(f'User> {message[\"content\"]}')\n",
        "\n",
-        "response = client.inference.chat_completion(\n",
+        "response = client.chat.completions.create(\n",
        "    messages=[message],\n",
-        "    model_id=model_id,\n",
+        "    model=model_id,\n",
        "    stream=True,  # <-----------\n",
        ")\n",
        "\n",
@ -2917,7 +2917,7 @@
        }
      ],
      "source": [
-        "response = client.inference.chat_completion(\n",
+        "response = client.chat.completions.create(\n",
        "    messages=[\n",
        "        {\n",
        "            \"role\": \"user\",\n",
@ -2937,11 +2937,11 @@
        "            ]\n",
        "        }\n",
        "    ],\n",
-        "    model_id=vision_model_id,\n",
+        "    model=vision_model_id,\n",
        "    stream=False,\n",
        ")\n",
        "\n",
-        "print(response.completion_message.content)"
+        "print(response.choices[0].message.content)"
      ]
    },
    {