litellm/cookbook/liteLLM_Ollama.ipynb
ishaan-jaff 8d8501fe19 docs
2023-09-21 12:33:39 -07:00

289 lines
11 KiB
Text
Vendored

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install litellm # version 0.1.724 or higher "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Call Ollama - llama2 with Streaming"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<generator object get_ollama_response_stream at 0x109096c10>\n",
"{'role': 'assistant', 'content': ' I'}\n",
"{'role': 'assistant', 'content': \"'\"}\n",
"{'role': 'assistant', 'content': 'm'}\n",
"{'role': 'assistant', 'content': ' L'}\n",
"{'role': 'assistant', 'content': 'La'}\n",
"{'role': 'assistant', 'content': 'MA'}\n",
"{'role': 'assistant', 'content': ','}\n",
"{'role': 'assistant', 'content': ' an'}\n",
"{'role': 'assistant', 'content': ' A'}\n",
"{'role': 'assistant', 'content': 'I'}\n",
"{'role': 'assistant', 'content': ' assistant'}\n",
"{'role': 'assistant', 'content': ' developed'}\n",
"{'role': 'assistant', 'content': ' by'}\n",
"{'role': 'assistant', 'content': ' Meta'}\n",
"{'role': 'assistant', 'content': ' A'}\n",
"{'role': 'assistant', 'content': 'I'}\n",
"{'role': 'assistant', 'content': ' that'}\n",
"{'role': 'assistant', 'content': ' can'}\n",
"{'role': 'assistant', 'content': ' understand'}\n",
"{'role': 'assistant', 'content': ' and'}\n",
"{'role': 'assistant', 'content': ' respond'}\n",
"{'role': 'assistant', 'content': ' to'}\n",
"{'role': 'assistant', 'content': ' human'}\n",
"{'role': 'assistant', 'content': ' input'}\n",
"{'role': 'assistant', 'content': ' in'}\n",
"{'role': 'assistant', 'content': ' a'}\n",
"{'role': 'assistant', 'content': ' convers'}\n",
"{'role': 'assistant', 'content': 'ational'}\n",
"{'role': 'assistant', 'content': ' manner'}\n",
"{'role': 'assistant', 'content': '.'}\n"
]
}
],
"source": [
"from litellm import completion\n",
"\n",
"response = completion(\n",
" model=\"ollama/llama2\", \n",
" messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n",
" api_base=\"http://localhost:11434\",\n",
" stream=True\n",
")\n",
"print(response)\n",
"for chunk in response:\n",
" print(chunk['choices'][0]['delta'])\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Call Ollama - Llama2 with Acompletion + Streaming"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: async_generator in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (1.10)\n"
]
}
],
"source": [
"# litellm uses async_generator for ollama async streaming, ensure it's installed\n",
"!pip install async_generator"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'm'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' just'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' an'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' A'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'I'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' don'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 't'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' have'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' access'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' real'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'time'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' weather'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' information'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' or'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' current'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' conditions'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' specific'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' location'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' живело'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' can'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' provide'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' weather'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' forec'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'asts'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' information'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' location'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' if'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' would'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' like'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Please'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' let'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' me'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' know'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' where'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' are'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' located'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' will'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' do'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' my'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' best'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' assist'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"None\n"
]
}
],
"source": [
"import litellm\n",
"\n",
"async def async_ollama():\n",
" response = await litellm.acompletion(\n",
" model=\"ollama/llama2\", \n",
" messages=[{ \"content\": \"what's the weather\" ,\"role\": \"user\"}], \n",
" api_base=\"http://localhost:11434\", \n",
" stream=True\n",
" )\n",
" async for chunk in response:\n",
" print(chunk)\n",
"\n",
"result = await async_ollama()\n",
"print(result)\n",
"\n",
"try:\n",
" async for chunk in result:\n",
" print(chunk)\n",
"except TypeError: # the last chunk is None from Ollama, this raises an error with async streaming\n",
" pass"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Completion Call"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"object\": \"chat.completion\",\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner.\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"id\": \"chatcmpl-ea7b8242-791f-4656-ba12-e098edeb960e\",\n",
" \"created\": 1695324686.6696231,\n",
" \"response_ms\": 4072.3050000000003,\n",
" \"model\": \"ollama/llama2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 10,\n",
" \"completion_tokens\": 27,\n",
" \"total_tokens\": 37\n",
" }\n",
"}\n"
]
}
],
"source": [
"from litellm import completion\n",
"\n",
"response = completion(\n",
" model=\"ollama/llama2\", \n",
" messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n",
" api_base=\"http://localhost:11434\"\n",
")\n",
"print(response)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}