{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install litellm # version 0.1.724 or higher " ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Call Ollama - llama2 with Streaming" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "{'role': 'assistant', 'content': ' I'}\n", "{'role': 'assistant', 'content': \"'\"}\n", "{'role': 'assistant', 'content': 'm'}\n", "{'role': 'assistant', 'content': ' L'}\n", "{'role': 'assistant', 'content': 'La'}\n", "{'role': 'assistant', 'content': 'MA'}\n", "{'role': 'assistant', 'content': ','}\n", "{'role': 'assistant', 'content': ' an'}\n", "{'role': 'assistant', 'content': ' A'}\n", "{'role': 'assistant', 'content': 'I'}\n", "{'role': 'assistant', 'content': ' assistant'}\n", "{'role': 'assistant', 'content': ' developed'}\n", "{'role': 'assistant', 'content': ' by'}\n", "{'role': 'assistant', 'content': ' Meta'}\n", "{'role': 'assistant', 'content': ' A'}\n", "{'role': 'assistant', 'content': 'I'}\n", "{'role': 'assistant', 'content': ' that'}\n", "{'role': 'assistant', 'content': ' can'}\n", "{'role': 'assistant', 'content': ' understand'}\n", "{'role': 'assistant', 'content': ' and'}\n", "{'role': 'assistant', 'content': ' respond'}\n", "{'role': 'assistant', 'content': ' to'}\n", "{'role': 'assistant', 'content': ' human'}\n", "{'role': 'assistant', 'content': ' input'}\n", "{'role': 'assistant', 'content': ' in'}\n", "{'role': 'assistant', 'content': ' a'}\n", "{'role': 'assistant', 'content': ' convers'}\n", "{'role': 'assistant', 'content': 'ational'}\n", "{'role': 'assistant', 'content': ' manner'}\n", "{'role': 'assistant', 'content': '.'}\n" ] } ], "source": [ "from litellm import completion\n", "\n", "response = completion(\n", " model=\"ollama/llama2\", \n", " messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n", " api_base=\"http://localhost:11434\",\n", " stream=True\n", ")\n", "print(response)\n", "for chunk in response:\n", " print(chunk['choices'][0]['delta'])\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Call Ollama - Llama2 with Acompletion + Streaming" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Requirement already satisfied: async_generator in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (1.10)\n" ] } ], "source": [ "# litellm uses async_generator for ollama async streaming, ensure it's installed\n", "!pip install async_generator" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': 'm'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' just'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' an'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' A'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': 'I'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' don'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': 't'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' have'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' access'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' real'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': 'time'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' weather'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' information'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' or'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' current'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' conditions'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' specific'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' location'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' живело'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' can'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' provide'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' weather'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' forec'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': 'asts'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' information'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' location'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' if'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' would'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' like'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' Please'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' let'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' me'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' know'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' where'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' are'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' located'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' will'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' do'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' my'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' best'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' assist'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n", "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n", "None\n" ] } ], "source": [ "import litellm\n", "\n", "async def async_ollama():\n", " response = await litellm.acompletion(\n", " model=\"ollama/llama2\", \n", " messages=[{ \"content\": \"what's the weather\" ,\"role\": \"user\"}], \n", " api_base=\"http://localhost:11434\", \n", " stream=True\n", " )\n", " async for chunk in response:\n", " print(chunk)\n", "\n", "result = await async_ollama()\n", "print(result)\n", "\n", "try:\n", " async for chunk in result:\n", " print(chunk)\n", "except TypeError: # the last chunk is None from Ollama, this raises an error with async streaming\n", " pass" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Completion Call" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"object\": \"chat.completion\",\n", " \"choices\": [\n", " {\n", " \"finish_reason\": \"stop\",\n", " \"index\": 0,\n", " \"message\": {\n", " \"content\": \" I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner.\",\n", " \"role\": \"assistant\",\n", " \"logprobs\": null\n", " }\n", " }\n", " ],\n", " \"id\": \"chatcmpl-ea7b8242-791f-4656-ba12-e098edeb960e\",\n", " \"created\": 1695324686.6696231,\n", " \"response_ms\": 4072.3050000000003,\n", " \"model\": \"ollama/llama2\",\n", " \"usage\": {\n", " \"prompt_tokens\": 10,\n", " \"completion_tokens\": 27,\n", " \"total_tokens\": 37\n", " }\n", "}\n" ] } ], "source": [ "from litellm import completion\n", "\n", "response = completion(\n", " model=\"ollama/llama2\", \n", " messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n", " api_base=\"http://localhost:11434\"\n", ")\n", "print(response)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }