mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
fix: remove inference.completion from docs (#3589)
# What does this PR do? now that /v1/inference/completion has been removed, no docs should refer to it this cleans up remaining references ## Test Plan ci Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
parent
498be131a1
commit
e9eb004bf8
6 changed files with 26 additions and 64 deletions
|
@ -706,20 +706,15 @@
|
|||
" provider_id=\"nvidia\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"response = client.inference.completion(\n",
|
||||
" content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
|
||||
"response = client.completions.create(\n",
|
||||
" prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
|
||||
" stream=False,\n",
|
||||
" model_id=CUSTOMIZED_MODEL_DIR,\n",
|
||||
" sampling_params={\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"top_p\",\n",
|
||||
" \"temperature\": 0.7,\n",
|
||||
" \"top_p\": 0.9\n",
|
||||
" },\n",
|
||||
" \"max_tokens\": 20,\n",
|
||||
" },\n",
|
||||
" model=CUSTOMIZED_MODEL_DIR,\n",
|
||||
" temperature=0.7,\n",
|
||||
" top_p=0.9,\n",
|
||||
" max_tokens=20,\n",
|
||||
")\n",
|
||||
"print(f\"Inference response: {response.content}\")"
|
||||
"print(f\"Inference response: {response.choices[0].text}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1233,20 +1228,15 @@
|
|||
" provider_id=\"nvidia\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"response = client.inference.completion(\n",
|
||||
" content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
|
||||
"response = client.completions.create(\n",
|
||||
" prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
|
||||
" stream=False,\n",
|
||||
" model_id=customized_chat_model_dir,\n",
|
||||
" sampling_params={\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"top_p\",\n",
|
||||
" \"temperature\": 0.7,\n",
|
||||
" \"top_p\": 0.9\n",
|
||||
" },\n",
|
||||
" \"max_tokens\": 20,\n",
|
||||
" },\n",
|
||||
" model=customized_chat_model_dir,\n",
|
||||
" temperature=0.7,\n",
|
||||
" top_p=0.9,\n",
|
||||
" max_tokens=20,\n",
|
||||
")\n",
|
||||
"print(f\"Inference response: {response.content}\")"
|
||||
"print(f\"Inference response: {response.choices[0].text}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue