mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-16 18:32:40 +00:00
update nb-1
This commit is contained in:
parent
06b699f5ec
commit
0027c5b614
2 changed files with 22 additions and 11 deletions
|
|
@ -85,7 +85,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "77c29dba",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
|
@ -104,7 +104,8 @@
|
|||
" {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
|
||||
" ],\n",
|
||||
" model='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" model_id='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response.completion_message.content)"
|
||||
|
|
@ -124,7 +125,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"id": "5c6812da",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
|
@ -143,7 +144,8 @@
|
|||
" {\"role\": \"system\", \"content\": \"You are shakespeare.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
|
||||
" ],\n",
|
||||
" model='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" model_id='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response.completion_message.content)"
|
||||
|
|
@ -226,7 +228,8 @@
|
|||
" message = {\"role\": \"user\", \"content\": user_input}\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
" model='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" model_id='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" stream=True\n",
|
||||
" )\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
"\n",
|
||||
|
|
@ -274,7 +277,8 @@
|
|||
"\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=conversation_history,\n",
|
||||
" model='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" model_id='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" stream=True\n",
|
||||
" )\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
"\n",
|
||||
|
|
@ -299,7 +303,7 @@
|
|||
"source": [
|
||||
"## Streaming Responses\n",
|
||||
"\n",
|
||||
"Llama Stack offers a `stream` parameter in the `chat_completion` function, which allows partial responses to be returned progressively as they are generated. This can enhance user experience by providing immediate feedback without waiting for the entire response to be processed."
|
||||
"Llama Stack offers a `stream` parameter in the `chat_completion` function, which allows partial responses to be returned progressively as they are generated. This can enhance user experience by providing immediate feedback without waiting for the entire response to be processed. You can change this `bool` value to `True` or `False` to allow streaming"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -322,8 +326,8 @@
|
|||
"\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
" model='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" stream=stream,\n",
|
||||
" model_id='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" stream=True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if not stream:\n",
|
||||
|
|
@ -341,7 +345,7 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
|
@ -355,7 +359,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
"version": "3.12.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
|||
|
|
@ -102,6 +102,13 @@ If you're looking for more specific topics like tool calling or agent setup, we
|
|||
llama stack build --template ollama --image-type conda
|
||||
```
|
||||
|
||||
After this step, you will see the console output:
|
||||
```
|
||||
Build Successful! Next steps:
|
||||
1. Set the environment variables: LLAMASTACK_PORT, OLLAMA_URL, INFERENCE_MODEL, SAFETY_MODEL
|
||||
2. `llama stack run /Users/username/.llama/distributions/llamastack-ollama/ollama-run.yaml`
|
||||
```
|
||||
|
||||
2. **Edit Configuration**:
|
||||
- Modify the `ollama-run.yaml` file located at `/Users/yourusername/.llama/distributions/llamastack-ollama/ollama-run.yaml`:
|
||||
- Change the `chromadb` port to `8000`.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue