fix: remove inference.completion from docs (#3589)

# What does this PR do? now that /v1/inference/completion has been removed, no docs should refer to it this cleans up remaining references ## Test Plan ci Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-12-03 18:00:36 +00:00 · 2025-09-29 16:14:41 -04:00 · 2025-09-29 16:14:41 -04:00 · e9eb004bf8
commit e9eb004bf8
parent 498be131a1
6 changed files with 26 additions and 64 deletions
--- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
+++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
@ -706,20 +706,15 @@
    "    provider_id=\"nvidia\",\n",
    ")\n",
    "\n",
-    "response = client.inference.completion(\n",
-    "    content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
+    "response = client.completions.create(\n",
+    "    prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
    "    stream=False,\n",
-    "    model_id=CUSTOMIZED_MODEL_DIR,\n",
-    "    sampling_params={\n",
-    "        \"strategy\": {\n",
-    "            \"type\": \"top_p\",\n",
-    "            \"temperature\": 0.7,\n",
-    "            \"top_p\": 0.9\n",
-    "        },\n",
-    "        \"max_tokens\": 20,\n",
-    "    },\n",
+    "    model=CUSTOMIZED_MODEL_DIR,\n",
+    "    temperature=0.7,\n",
+    "    top_p=0.9,\n",
+    "    max_tokens=20,\n",
    ")\n",
-    "print(f\"Inference response: {response.content}\")"
+    "print(f\"Inference response: {response.choices[0].text}\")"
   ]
  },
  {
@ -1233,20 +1228,15 @@
    "    provider_id=\"nvidia\",\n",
    ")\n",
    "\n",
-    "response = client.inference.completion(\n",
-    "    content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
+    "response = client.completions.create(\n",
+    "    prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
    "    stream=False,\n",
-    "    model_id=customized_chat_model_dir,\n",
-    "    sampling_params={\n",
-    "        \"strategy\": {\n",
-    "            \"type\": \"top_p\",\n",
-    "            \"temperature\": 0.7,\n",
-    "            \"top_p\": 0.9\n",
-    "        },\n",
-    "        \"max_tokens\": 20,\n",
-    "    },\n",
+    "    model=customized_chat_model_dir,\n",
+    "    temperature=0.7,\n",
+    "    top_p=0.9,\n",
+    "    max_tokens=20,\n",
    ")\n",
-    "print(f\"Inference response: {response.content}\")"
+    "print(f\"Inference response: {response.choices[0].text}\")"
   ]
  },
  {