mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
fix: remove inference.completion from docs (#3589)
# What does this PR do? now that /v1/inference/completion has been removed, no docs should refer to it this cleans up remaining references ## Test Plan ci Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
parent
498be131a1
commit
e9eb004bf8
6 changed files with 26 additions and 64 deletions
|
@ -824,16 +824,10 @@
|
|||
"\n",
|
||||
"\n",
|
||||
"user_input = \"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003. Extract this information into JSON for me. \"\n",
|
||||
"response = client.inference.completion(\n",
|
||||
" model_id=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
|
||||
" content=user_input,\n",
|
||||
" stream=False,\n",
|
||||
" sampling_params={\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"greedy\",\n",
|
||||
" },\n",
|
||||
" \"max_tokens\": 50,\n",
|
||||
" },\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": user_input}],\n",
|
||||
" max_tokens=50,\n",
|
||||
" response_format={\n",
|
||||
" \"type\": \"json_schema\",\n",
|
||||
" \"json_schema\": Output.model_json_schema(),\n",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue