Merge branch 'main' of github.com:vincelwt/litellm

This commit is contained in:
Vince Lwt 2023-08-21 12:22:07 +02:00
commit 22c7e38de5
94 changed files with 5691 additions and 2812 deletions

View file

@ -8,6 +8,16 @@ jobs:
steps:
- checkout
- run:
name: Check if litellm dir was updated or if pyproject.toml was modified
command: |
if [ -n "$(git diff --name-only $CIRCLE_SHA1^..$CIRCLE_SHA1 | grep -E 'pyproject\.toml|litellm/')" ]; then
echo "litellm updated"
else
echo "No changes to litellm or pyproject.toml. Skipping tests."
circleci step halt
fi
- run:
name: Install Dependencies
command: |
@ -15,8 +25,20 @@ jobs:
python -m pip install -r .circleci/requirements.txt
pip install infisical
pip install pytest
pip install mypy
pip install openai[datalib]
pip install -Uq chromadb==0.3.29
- run:
name: Linting Testing
command: |
cd litellm
python -m pip install types-requests types-setuptools
if ! python -m mypy . --ignore-missing-imports; then
echo "mypy detected errors"
exit 1
fi
cd ..
# Run pytest and generate JUnit XML report
@ -77,7 +99,3 @@ workflows:
- publish_to_pypi:
requires:
- local_testing
filters:
branches:
only:
- main

View file

@ -1,6 +1,6 @@
# *🚅 litellm*
[![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.424-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
[![CircleCI](https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main)
![Downloads](https://img.shields.io/pypi/dm/litellm)
[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)
@ -35,13 +35,13 @@ messages = [{ "content": "Hello, how are you?","role": "user"}]
response = completion(model="gpt-3.5-turbo", messages=messages)
# cohere call
response = completion("command-nightly", messages)
response = completion(model="command-nightly", messages)
```
Code Sample: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
Stable version
```
pip install litellm==0.1.345
pip install litellm==0.1.424
```
## Streaming Queries

File diff suppressed because one or more lines are too long

View file

@ -19,12 +19,12 @@
},
"outputs": [],
"source": [
"!pip install litellm==0.1.371"
"!pip install litellm==0.1.419"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"metadata": {
"id": "TMI3739_9q97"
},
@ -32,7 +32,7 @@
"source": [
"import os\n",
"from litellm import completion\n",
"os.environ[\"TOGETHER_AI_TOKEN\"] = \"\" #@param\n",
"os.environ[\"TOGETHERAI_API_KEY\"] = \"\" #@param\n",
"user_message = \"Hello, whats the weather in San Francisco??\"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]"
]
@ -50,26 +50,47 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Jrrt8puj523f",
"outputId": "5a5b5beb-cda3-413e-8e83-4423d392cb44"
"outputId": "24494dea-816f-47a6-ade4-1b04f2e9085b"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \"\\n\\nI'm not able to provide real-time weather information. However, I can suggest\"}}], 'created': 1691629657.9288375, 'model': 'togethercomputer/llama-2-70b-chat', 'usage': {'prompt_tokens': 9, 'completion_tokens': 17, 'total_tokens': 26}}\n"
"{\n",
" 'choices': [\n",
"{\n",
" 'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {\n",
" 'role': 'assistant',\n",
" 'content': \"\n",
"\n",
"I'm not able to provide real-time weather information. However, I can suggest some ways for you to find out the current weather in San Francisco.\n",
"\n",
"1. Check online weather websites: There are many websites that provide up-to-date weather information, such as AccuWeather, Weather.com, or the National Weather Service. You can enter \"San Francisco\" in the search bar and get the current weather conditions, forecast, and radar imagery.\n",
"2. Use a weather app: You can download a weather app on your smartphone that provides real-time weather information. Some popular weather apps include Dark Sky, Weather Underground, and The Weather Channel.\n",
"3. Tune into local news: You can watch local news channels or listen to local radio stations to get the latest weather forecast and current conditions.\n",
"4. Check social media: Follow local weather accounts on social media platforms like Twitter or Facebook to\"\n",
"}\n",
"}\n",
" ],\n",
" 'created': 1692323365.8261144,\n",
" 'model': 'togethercomputer/llama-2-70b-chat',\n",
" 'usage': {'prompt_tokens': 9, 'completion_tokens': 176, 'total_tokens': 185}\n",
"}\n"
]
}
],
"source": [
"model_name = \"togethercomputer/llama-2-70b-chat\"\n",
"response = completion(model=model_name, messages=messages, custom_llm_provider=\"together_ai\")\n",
"response = completion(model=model_name, messages=messages, max_tokens=200)\n",
"print(response)"
]
},
@ -85,46 +106,569 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wuBhlZtC6MH5",
"outputId": "fcb82177-6494-4963-8e37-8716d3b9e616"
"outputId": "1bedc981-4ab1-4abd-9b81-a9727223b66a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<litellm.utils.CustomStreamWrapper object at 0x7ad005e93ee0>\n",
"{'role': 'assistant', 'content': '\\\\n'}\n",
"{'role': 'assistant', 'content': '\\\\n'}\n",
"{'role': 'assistant', 'content': 'I'}\n",
"{'role': 'assistant', 'content': 'm'}\n",
"{'role': 'assistant', 'content': ' not'}\n",
"{'role': 'assistant', 'content': ' able'}\n",
"{'role': 'assistant', 'content': ' to'}\n",
"{'role': 'assistant', 'content': ' provide'}\n",
"{'role': 'assistant', 'content': ' real'}\n",
"{'role': 'assistant', 'content': '-'}\n",
"{'role': 'assistant', 'content': 'time'}\n",
"{'role': 'assistant', 'content': ' weather'}\n",
"{'role': 'assistant', 'content': ' information'}\n",
"{'role': 'assistant', 'content': '.'}\n",
"{'role': 'assistant', 'content': ' However'}\n",
"{'role': 'assistant', 'content': ','}\n",
"{'role': 'assistant', 'content': ' I'}\n",
"{'role': 'assistant', 'content': ' can'}\n"
"<async_generator object together_ai_completion_streaming at 0x7d39eeae81c0>\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'Y'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Com'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'bin'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ator'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' ('}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'Y'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ')'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' are'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' two'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' popular'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' startup'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' acceler'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ators'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' have'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' gained'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' recognition'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' their'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' effect'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'iveness'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'urt'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'uring'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' scaling'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' early'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'stage'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' companies'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ities'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' they'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' also'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' have'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' distinct'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' differences'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' set'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' them'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' apart'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' In'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' this'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' ess'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ay'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' we'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' will'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' explore'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' key'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' features'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' discuss'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' which'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' might'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' be'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' better'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' fit'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' startup'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'Y'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Com'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'bin'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ator'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' one'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' most'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' successful'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' startup'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' acceler'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ators'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' world'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' port'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'folio'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' includes'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Air'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'b'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'nb'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Drop'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'box'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Red'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'dit'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' F'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ounded'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' '}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '2'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '5'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' has'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' fund'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ed'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' over'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' '}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '1'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '9'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' start'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ups'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' combined'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' valu'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ation'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' over'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' $'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '1'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' billion'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' The'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' known'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' its'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' inten'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'se'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' three'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'month'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' boot'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' camp'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'style'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' format'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' where'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' found'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ers'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' work'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' closely'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' experienced'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' ment'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ors'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' develop'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' their'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' products'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' ref'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ine'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' their'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' business'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' models'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' prepare'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' fund'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ra'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ising'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 's'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' focus'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' on'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' software'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' technology'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' internet'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' start'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ups'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' has'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' strong'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' track'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' record'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' ident'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ifying'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'urt'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'uring'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' successful'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' companies'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' these'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' spaces'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'l'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' on'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' other'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' hand'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' relatively'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' new'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' acceler'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ator'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' was'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' founded'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' '}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '2'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '1'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '7'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' While'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' it'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' may'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' not'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' have'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' same'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' level'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' brand'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' recognition'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' as'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' has'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' quickly'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' gained'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' reputation'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' its'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' unique'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' approach'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' startup'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' acceleration'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' The'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' focus'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'es'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' on'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' supporting'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' under'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 're'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'present'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ed'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' found'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ers'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' particularly'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' women'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' people'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' color'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' provides'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' range'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' resources'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' support'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' help'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' these'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' found'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ers'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' succeed'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 's'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' designed'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' be'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' more'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' flexible'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' personal'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ized'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' than'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' traditional'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' acceler'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ators'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' focus'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' on'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' connecting'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' found'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ers'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' ment'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ors'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' resources'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' are'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' tail'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ored'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' their'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' specific'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' needs'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'One'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' key'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' difference'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' between'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' type'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' companies'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' they'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' support'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' focus'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'es'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' primarily'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' on'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' software'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' technology'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' internet'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' start'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ups'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' while'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' has'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' bro'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ader'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' focus'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' includes'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' range'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' indust'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ries'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' such'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' as'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' health'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'care'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' fin'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ance'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' consumer'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' products'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' This'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' means'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' if'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' startup'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' non'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'tech'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' industry'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' may'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' be'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' better'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' fit'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'An'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'other'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' difference'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' between'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' two'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' programs'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' their'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' approach'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' fund'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ing'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' provides'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' seed'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' fund'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ing'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' all'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' its'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' port'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'folio'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' companies'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' typically'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' range'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' $'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '1'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' $'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '2'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' In'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' contrast'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' does'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' not'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' provide'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' fund'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': 'ing'}}]}\n",
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n"
]
}
],
"source": [
"response = completion(model=model_name, messages=messages, stream=True, custom_llm_provider=\"together_ai\")\n",
"print(response)\n",
"for chunk in response:\n",
" print(chunk['choices'][0]['delta']) # same as openai format"
"user_message = \"Write 1page essay on YC + liteLLM\"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
"\n",
"\n",
"import asyncio\n",
"async def parse_stream(stream):\n",
" async for elem in stream:\n",
" print(elem)\n",
" return\n",
"\n",
"stream = completion(model=\"togethercomputer/llama-2-70b-chat\", messages=messages, stream=True, max_tokens=800)\n",
"print(stream)\n",
"\n",
"# Await the asynchronous function directly in the notebook cell\n",
"await parse_stream(stream)\n"
]
}
],

View file

@ -0,0 +1,201 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Langchain liteLLM Demo Notebook\n",
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
"Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
"\n",
"Call all LLM models using the same I/O interface\n",
"\n",
"Example usage\n",
"```python\n",
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"ChatLiteLLM(model=\"command-nightly\")\n",
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"```"
],
"metadata": {
"id": "5hwntUxTMxEk"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "aPNAUsCvB6Sv"
},
"outputs": [],
"source": [
"!pip install litellm langchain"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"from langchain.chat_models import ChatLiteLLM\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" AIMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.schema import AIMessage, HumanMessage, SystemMessage"
],
"metadata": {
"id": "MOhRaVnhB-0J"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TahkCtlmCD65",
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
},
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXNDyU4jChcs",
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
},
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czbDJRKcC7BV",
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
},
"execution_count": 27,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 27
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['COHERE_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tZxpq5PDDY9Y",
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
},
"execution_count": 30,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 30
}
]
}
]
}

View file

@ -8,6 +8,8 @@
[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/DYqQAW?referralCode=t3ukrU)
![4BC6491E-86D0-4833-B061-9F54524B2579](https://github.com/BerriAI/litellm/assets/17561003/f5dd237b-db5e-42e1-b1ac-f05683b1d724)
## What does liteLLM proxy do
- Make `/chat/completions` requests for 50+ LLM models **Azure, OpenAI, Replicate, Anthropic, Hugging Face**
@ -156,3 +158,11 @@ This project includes a `Dockerfile` allowing you to build and deploy a Docker P
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
## Roadmap
- [ ] Support hosted db (e.g. Supabase)
- [ ] Easily send data to places like posthog and sentry.
- [ ] Add a hot-cache for project spend logs - enables fast checks for user + project limitings
- [ ] Implement user-based rate-limiting
- [ ] Spending controls per project - expose key creation endpoint
- [ ] Need to store a keys db -> mapping created keys to their alias (i.e. project name)
- [ ] Easily add new models as backups / as the entry-point (add this to the available model list)

BIN
dist/litellm-0.1.401-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/litellm-0.1.401.tar.gz vendored Normal file

Binary file not shown.

BIN
dist/litellm-0.1.432-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/litellm-0.1.432.tar.gz vendored Normal file

Binary file not shown.

BIN
dist/litellm-0.1.434-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/litellm-0.1.434.tar.gz vendored Normal file

Binary file not shown.

BIN
dist/litellm-0.1.435-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/litellm-0.1.435.tar.gz vendored Normal file

Binary file not shown.

View file

@ -1,12 +0,0 @@
---
slug: first-blog-post
title: First Blog Post
authors:
name: Gao Wei
title: Docusaurus Core Team
url: https://github.com/wgao19
image_url: https://github.com/wgao19.png
tags: [hola, docusaurus]
---
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

View file

@ -1,44 +0,0 @@
---
slug: long-blog-post
title: Long Blog Post
authors: endi
tags: [hello, docusaurus]
---
This is the summary of a very long blog post,
Use a `<!--` `truncate` `-->` comment to limit blog post size in the list view.
<!--truncate-->
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

View file

@ -1,20 +0,0 @@
---
slug: mdx-blog-post
title: MDX Blog Post
authors: [slorber]
tags: [docusaurus]
---
Blog posts support [Docusaurus Markdown features](https://docusaurus.io/docs/markdown-features), such as [MDX](https://mdxjs.com/).
:::tip
Use the power of React to create interactive blog posts.
```js
<button onClick={() => alert('button clicked!')}>Click me!</button>
```
<button onClick={() => alert('button clicked!')}>Click me!</button>
:::

Binary file not shown.

Before

Width:  |  Height:  |  Size: 94 KiB

View file

@ -1,25 +1,43 @@
---
slug: welcome
title: Welcome
authors: [slorber, yangshun]
tags: [facebook, hello, docusaurus]
---
# 🚅 litellm
a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic APIs
[Docusaurus blogging features](https://docusaurus.io/docs/blog) are powered by the [blog plugin](https://docusaurus.io/docs/api/plugins/@docusaurus/plugin-content-blog).
###### litellm manages:
* Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
* Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
* Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)
Simply add Markdown files (or folders) to the `blog` directory.
###### observability:
* Logging - see exactly what the raw model request/response is by plugging in your own function `completion(.., logger_fn=your_logging_fn)` and/or print statements from the package `litellm.set_verbose=True`
* Callbacks - automatically send your data to Helicone, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)
Regular blog authors can be added to `authors.yml`.
## Quick Start
Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
### Installation
```
pip install litellm
```
The blog post date can be extracted from filenames, such as:
### Usage
```python
from litellm import completion
- `2019-05-30-welcome.md`
- `2019-05-30-welcome/index.md`
## set ENV variables
os.environ["OPENAI_API_KEY"] = "openai key"
os.environ["COHERE_API_KEY"] = "cohere key"
A blog post folder can be convenient to co-locate blog post images:
messages = [{ "content": "Hello, how are you?","role": "user"}]
![Docusaurus Plushie](./docusaurus-plushie-banner.jpeg)
# openai call
response = completion(model="gpt-3.5-turbo", messages=messages)
The blog supports tags as well!
# cohere call
response = completion("command-nightly", messages)
```
Need Help / Support : [see troubleshooting](https://litellm.readthedocs.io/en/latest/troubleshoot)
**And if you don't want a blog**: just delete this directory, and use `blog: false` in your Docusaurus config.
## Why did we build liteLLM
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
## Support
* [Meet with us 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
* Contact us at ishaan@berri.ai / krrish@berri.ai

View file

@ -1,17 +0,0 @@
endi:
name: Endilie Yacop Sucipto
title: Maintainer of Docusaurus
url: https://github.com/endiliey
image_url: https://github.com/endiliey.png
yangshun:
name: Yangshun Tay
title: Front End Engineer @ Facebook
url: https://github.com/yangshun
image_url: https://github.com/yangshun.png
slorber:
name: Sébastien Lorber
title: Docusaurus maintainer
url: https://sebastienlorber.com
image_url: https://github.com/slorber.png

View file

@ -0,0 +1,42 @@
# Caching Completion() Responses
liteLLM implements exact match caching. It can be enabled by setting
1. `litellm.caching`: When set to `True`, enables caching for all responses. Keys are the input `messages` and values store in the cache is the corresponding `response`
2. `litellm.caching_with_models`: When set to `True`, enables caching on a per-model basis.Keys are the input `messages + model` and values store in the cache is the corresponding `response`
## Usage
1. Caching - cache
Keys in the cache are `model`, the following example will lead to a cache hit
```python
litellm.caching = True
# Make completion calls
response1 = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Tell me a joke."}])
response2 = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Tell me a joke."}])
# response1 == response2, response 1 is cached
# with a diff model
response3 = completion(model="command-nightly", messages=[{"role": "user", "content": "Tell me a joke."}])
# response3 == response1 == response2, since keys are messages
```
2. Caching with Models - caching_with_models
Keys in the cache are `messages + model`, the following example will not lead to a cache hit
```python
litellm.caching_with_models = True
# Make completion calls
response1 = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Tell me a joke."}])
response2 = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Tell me a joke."}])
# response1 == response2, response 1 is cached
# with a diff model, this will call the API since the key is not cached
response3 = completion(model="command-nightly", messages=[{"role": "user", "content": "Tell me a joke."}])
# response3 != response1, since keys are messages + model
```

View file

@ -1,6 +1,6 @@
# Completion Function - completion()
# Input Format - completion()
The Input params are **exactly the same** as the
<a href="https://platform.openai.com/docs/api-reference/chat/create" target="_blank" rel="noopener noreferrer">OpenAI Create chat completion</a>, and let you call **Azure OpenAI, Anthropic, Cohere, Replicate, OpenRouter** models in the same format.
<a href="https://platform.openai.com/docs/api-reference/chat/create" target="_blank" rel="noopener noreferrer">OpenAI Create chat completion</a>, and let you call Azure OpenAI, Anthropic, Cohere, Replicate, OpenRouter models in the same format.
In addition, liteLLM allows you to pass in the following **Optional** liteLLM args:
`force_timeout`, `azure`, `logger_fn`, `verbose`

View file

@ -1,12 +1,50 @@
# Completion Function - completion()
Here's the exact json output you can expect from a litellm `completion` call:
# Output Format - completion()
Here's the exact json output and type you can expect from all litellm `completion` calls for all models
```python
{'choices': [{'finish_reason': 'stop',
'index': 0,
'message': {'role': 'assistant',
'content': " I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."}}],
{
'choices': [
{
'finish_reason': str, # String: 'stop'
'index': int, # Integer: 0
'message': { # Dictionary [str, str]
'role': str, # String: 'assistant'
'content': str # String: "default message"
}
}
],
'created': str, # String: None
'model': str, # String: None
'usage': { # Dictionary [str, int]
'prompt_tokens': int, # Integer
'completion_tokens': int, # Integer
'total_tokens': int # Integer
}
}
```
You can access the response as a dictionary or as a class object, just as OpenAI allows you
```python
print(response.choices[0].message.content)
print(response['choices'][0]['message']['content'])
```
Here's what an example response looks like
```python
{
'choices': [
{
'finish_reason': 'stop',
'index': 0,
'message': {
'role': 'assistant',
'content': " I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."
}
}
],
'created': 1691429984.3852863,
'model': 'claude-instant-1',
'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}}
'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}
}
```

View file

@ -1,4 +1,12 @@
# Generation/Completion/Chat Completion Models
# Supported Chat, Completion Models
## API Keys
liteLLM reads key naming, all keys should be named in the following format:
`<PROVIDER>_API_KEY` for example
* `OPENAI_API_KEY` Provider = OpenAI
* `TOGETHERAI_API_KEY` Provider = TogetherAI
* `HUGGINGFACE_API_KEY` Provider = HuggingFace
### OpenAI Chat Completion Models
@ -49,6 +57,7 @@ VertexAI requires you to set `application_default_credentials.json`, this can be
| Model Name | Function Call | Required OS Variables |
|------------------|--------------------------------------------|--------------------------------------|
| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
### Hugging Face Inference API
@ -64,10 +73,10 @@ Here are some examples of supported models:
| Model Name | Function Call | Required OS Variables |
|------------------|-------------------------------------------------------------------------------------|--------------------------------------|
| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k) | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) | `completion(model="bigcode/starcoder", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) | `completion(model="google/flan-t5-xxl", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) | `completion(model="google/flan-t5-large", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k) | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) | `completion(model="bigcode/starcoder", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) | `completion(model="google/flan-t5-xxl", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) | `completion(model="google/flan-t5-large", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
### AI21 Models
| Model Name | Function Call | Required OS Variables |
@ -82,9 +91,24 @@ Here are some examples of supported models:
|------------------|--------------------------------------------|--------------------------------------|
| command-nightly | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']` |
### BaseTen Models
### Together AI Models
liteLLM supports `non-streaming` and `streaming` requests to all models on https://api.together.xyz/
Example TogetherAI Usage - Note: liteLLM supports all models deployed on TogetherAI
| Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
| togethercomputer/llama-2-70b-chat | `completion('togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/LLaMA-2-13b-chat | `completion('togethercomputer/LLaMA-2-13b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/code-and-talk-v1 | `completion('togethercomputer/code-and-talk-v1', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/creative-v1 | `completion('togethercomputer/creative-v1', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/yourmodel | `completion('togethercomputer/yourmodel', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Baseten Models
Baseten provides infrastructure to deploy and serve ML models https://www.baseten.co/. Use liteLLM to easily call models deployed on Baseten.
Example Baseten Usage - Note: liteLLM supports all models deployed on Basten
| Model Name | Function Call | Required OS Variables |
|------------------|--------------------------------------------|------------------------------------|
@ -99,13 +123,37 @@ All the text models from [OpenRouter](https://openrouter.ai/docs) are supported
| Model Name | Function Call | Required OS Variables |
|------------------|--------------------------------------------|--------------------------------------|
| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
### Petals Models
Supported models on https://chat.petals.dev/
| Model Name | Function Call | Required OS Variables |
|----------------------|------------------------------------------------------------------------|--------------------------------|
| stabilityai/StableBeluga2 | `completion(model='stabilityai/StableBeluga2', messages, custom_llm_provider="petals")` | No API Key required |
| enoch/llama-65b-hf | `completion(model='enoch/llama-65b-hf', messages, custom_llm_provider="petals")` | No API Key required |
| bigscience/bloomz | `completion(model='bigscience/bloomz', messages, custom_llm_provider="petals")` | No API Key required |
### Ollama Models
Ollama supported models: https://github.com/jmorganca/ollama
| Model Name | Function Call | Required OS Variables |
|----------------------|-----------------------------------------------------------------------------------|--------------------------------|
| Llama2 7B | `completion(model='llama2', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
| Llama2 13B | `completion(model='llama2:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
| Llama2 70B | `completion(model='llama2:70b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
| Llama2 Uncensored | `completion(model='llama2-uncensored', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
| Orca Mini | `completion(model='orca-mini', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
| Vicuna | `completion(model='vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
| Nous-Hermes | `completion(model='nous-hermes', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
| Nous-Hermes 13B | `completion(model='nous-hermes:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
| Wizard Vicuna Uncensored | `completion(model='wizard-vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |

View file

@ -1,30 +1,32 @@
# 🚅 litellm
# litellm
a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic APIs
[![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
[![CircleCI](https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main)
![Downloads](https://img.shields.io/pypi/dm/litellm)
[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)
###### litellm manages:
[![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)
- Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
- Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
- Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)
a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages:
###### observability:
- translating inputs to the provider's completion and embedding endpoints
- guarantees [consistent output](https://litellm.readthedocs.io/en/latest/output/), text responses will always be available at `['choices'][0]['message']['content']`
- exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance)
- Logging - see exactly what the raw model request/response is by plugging in your own function `completion(.., logger_fn=your_logging_fn)` and/or print statements from the package `litellm.set_verbose=True`
- Callbacks - automatically send your data to Helicone, LLMonitor, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)
# usage
## Quick Start
<a href='https://docs.litellm.ai/docs/completion/supported' target="_blank"><img alt='None' src='https://img.shields.io/badge/Supported_LLMs-100000?style=for-the-badge&logo=None&logoColor=000000&labelColor=000000&color=8400EA'/></a>
Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
Demo - https://litellm.ai/playground \
Read the docs - https://docs.litellm.ai/docs/
### Installation
## quick start
```
pip install litellm
```
### Usage
```python
from litellm import completion
@ -41,13 +43,37 @@ response = completion(model="gpt-3.5-turbo", messages=messages)
response = completion("command-nightly", messages)
```
Need Help / Support : [see troubleshooting](https://litellm.readthedocs.io/en/latest/troubleshoot)
Code Sample: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
## Why did we build liteLLM
Stable version
```
pip install litellm==0.1.345
```
## Streaming Queries
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
Streaming is supported for OpenAI, Azure, Anthropic, Huggingface models
```python
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
for chunk in response:
print(chunk['choices'][0]['delta'])
# claude 2
result = completion('claude-2', messages, stream=True)
for chunk in result:
print(chunk['choices'][0]['delta'])
```
# support / talk with founders
- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
# why did we build this
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
## Support
- [Meet with us 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- Contact us at ishaan@berri.ai / krrish@berri.ai

View file

@ -22,11 +22,13 @@ create table
messages json null default '{}'::json,
response json null default '{}'::json,
end_user text null default ''::text,
status text null default ''::text,
error json null default '{}'::json,
response_time real null default '0'::real,
total_cost real null,
additional_details json null default '{}'::json,
constraint request_logs_pkey primary key (id)
litellm_call_id text unique,
primary key (id)
) tablespace pg_default;
```

View file

@ -6,8 +6,9 @@ const darkCodeTheme = require('prism-react-renderer/themes/dracula');
/** @type {import('@docusaurus/types').Config} */
const config = {
title: 'LiteLLM',
title: 'liteLLM',
tagline: 'Simplify LLM API Calls',
favicon: '/img/favicon.ico',
// Set the production url of your site here
url: 'https://litellm.vercel.app/',
@ -80,35 +81,27 @@ const config = {
{
title: 'Community',
items: [
{
label: 'Stack Overflow',
href: 'https://stackoverflow.com/questions/tagged/docusaurus',
},
{
label: 'Discord',
href: 'https://discordapp.com/invite/docusaurus',
href: 'https://discord.com/invite/wuPM9dRgDw',
},
{
label: 'Twitter',
href: 'https://twitter.com/docusaurus',
href: 'https://twitter.com/LiteLLM',
},
],
},
{
title: 'More',
items: [
{
label: 'Blog',
to: '/blog',
},
{
label: 'GitHub',
href: 'https://github.com/facebook/docusaurus',
href: 'https://github.com/BerriAI/litellm/',
},
],
},
],
copyright: `Copyright © ${new Date().getFullYear()} My Project, Inc. Built with Docusaurus.`,
copyright: `Copyright © ${new Date().getFullYear()} liteLLM`,
},
prism: {
theme: lightCodeTheme,

25
docs/my-website/index.md Normal file
View file

@ -0,0 +1,25 @@
---
slug: welcome
title: Welcome
authors: [slorber, yangshun]
tags: [facebook, hello, docusaurus]
---
[Docusaurus blogging features](https://docusaurus.io/docs/blog) are powered by the [blog plugin](https://docusaurus.io/docs/api/plugins/@docusaurus/plugin-content-blog).
Simply add Markdown files (or folders) to the `blog` directory.
Regular blog authors can be added to `authors.yml`.
The blog post date can be extracted from filenames, such as:
- `2019-05-30-welcome.md`
- `2019-05-30-welcome/index.md`
A blog post folder can be convenient to co-locate blog post images:
![Docusaurus Plushie](./docusaurus-plushie-banner.jpeg)
The blog supports tags as well!
**And if you don't want a blog**: just delete this directory, and use `blog: false` in your Docusaurus config.

View file

@ -21,14 +21,15 @@ const sidebars = {
'index',
{
type: 'category',
label: 'completion_function',
items: ['completion/input', 'completion/supported','completion/output'],
label: 'Completion()',
items: ['completion/input','completion/output'],
},
{
type: 'category',
label: 'embedding_function',
label: 'Embedding()',
items: ['embedding/supported_embedding'],
},
'completion/supported',
{
type: 'category',
label: 'Tutorials',
@ -37,6 +38,7 @@ const sidebars = {
'token_usage',
'stream',
'secret',
'caching',
{
type: 'category',
label: 'Logging & Observability',

View file

@ -1,23 +1,27 @@
# 🚅 litellm
a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic APIs
# *🚅 litellm*
[![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
[![CircleCI](https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main)
![Downloads](https://img.shields.io/pypi/dm/litellm)
[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)
###### litellm manages:
* Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
* Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
* Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)
[![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)
###### observability:
* Logging - see exactly what the raw model request/response is by plugging in your own function `completion(.., logger_fn=your_logging_fn)` and/or print statements from the package `litellm.set_verbose=True`
* Callbacks - automatically send your data to Helicone, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)
a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages:
- translating inputs to the provider's completion and embedding endpoints
- guarantees [consistent output](https://litellm.readthedocs.io/en/latest/output/), text responses will always be available at `['choices'][0]['message']['content']`
- exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance)
# usage
<a href='https://docs.litellm.ai/docs/completion/supported' target="_blank"><img alt='None' src='https://img.shields.io/badge/Supported_LLMs-100000?style=for-the-badge&logo=None&logoColor=000000&labelColor=000000&color=8400EA'/></a>
## Quick Start
Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
### Installation
Demo - https://litellm.ai/playground \
Read the docs - https://docs.litellm.ai/docs/
## quick start
```
pip install litellm
```
### Usage
```python
from litellm import completion
@ -33,11 +37,32 @@ response = completion(model="gpt-3.5-turbo", messages=messages)
# cohere call
response = completion("command-nightly", messages)
```
Need Help / Support : [see troubleshooting](https://litellm.readthedocs.io/en/latest/troubleshoot)
Code Sample: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
## Why did we build liteLLM
Stable version
```
pip install litellm==0.1.345
```
## Streaming Queries
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
Streaming is supported for OpenAI, Azure, Anthropic, Huggingface models
```python
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
for chunk in response:
print(chunk['choices'][0]['delta'])
# claude 2
result = completion('claude-2', messages, stream=True)
for chunk in result:
print(chunk['choices'][0]['delta'])
```
# support / talk with founders
- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
# why did we build this
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
## Support
* [Meet with us 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
* Contact us at ishaan@berri.ai / krrish@berri.ai

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 15 KiB

Before After
Before After

View file

@ -1,50 +1,120 @@
import threading
success_callback = []
failure_callback = []
set_verbose=False
telemetry=True
max_tokens = 256 # OpenAI Defaults
from typing import Callable, List, Optional
input_callback: List[str] = []
success_callback: List[str] = []
failure_callback: List[str] = []
set_verbose = False
telemetry = True
max_tokens = 256 # OpenAI Defaults
retry = True
api_key = None
openai_key = None
azure_key = None
anthropic_key = None
replicate_key = None
cohere_key = None
openrouter_key = None
huggingface_key = None
vertex_project = None
vertex_location = None
hugging_api_token = None
api_key: Optional[str] = None
openai_key: Optional[str] = None
azure_key: Optional[str] = None
anthropic_key: Optional[str] = None
replicate_key: Optional[str] = None
cohere_key: Optional[str] = None
openrouter_key: Optional[str] = None
huggingface_key: Optional[str] = None
vertex_project: Optional[str] = None
vertex_location: Optional[str] = None
hugging_api_token: Optional[str] = None
togetherai_api_key: Optional[str] = None
caching = False
caching_with_models = False # if you want the caching key to be model + prompt
model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
"gpt-3.5-turbo": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-35-turbo": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
}, # azure model name
"gpt-3.5-turbo-0613": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
}
####### THREAD-SPECIFIC DATA ###################
class MyLocal(threading.local):
def __init__(self):
self.user = "Hello World"
_thread_context = MyLocal()
def identify(event_details):
# Store user in thread local data
if "user" in event_details:
_thread_context.user = event_details["user"]
####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
api_base = None
headers = None
@ -55,60 +125,48 @@ config_path = None
secret_manager_client = None
####### COMPLETION MODELS ###################
open_ai_chat_completion_models = [
"gpt-4",
"gpt-4-0613",
"gpt-4-32k",
"gpt-4-32k-0613",
#################
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
]
open_ai_text_completion_models = [
'text-davinci-003'
"gpt-4",
"gpt-4-0613",
"gpt-4-32k",
"gpt-4-32k-0613",
#################
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
]
open_ai_text_completion_models = ["text-davinci-003"]
cohere_models = [
'command-nightly',
"command",
"command-light",
"command-medium-beta",
"command-xlarge-beta"
"command-nightly",
"command",
"command-light",
"command-medium-beta",
"command-xlarge-beta",
]
anthropic_models = [
"claude-2",
"claude-instant-1",
"claude-instant-1.2"
]
anthropic_models = ["claude-2", "claude-instant-1", "claude-instant-1.2"]
replicate_models = [
"replicate/"
] # placeholder, to make sure we accept any replicate model in our model_list
] # placeholder, to make sure we accept any replicate model in our model_list
openrouter_models = [
'google/palm-2-codechat-bison',
'google/palm-2-chat-bison',
'openai/gpt-3.5-turbo',
'openai/gpt-3.5-turbo-16k',
'openai/gpt-4-32k',
'anthropic/claude-2',
'anthropic/claude-instant-v1',
'meta-llama/llama-2-13b-chat',
'meta-llama/llama-2-70b-chat'
"google/palm-2-codechat-bison",
"google/palm-2-chat-bison",
"openai/gpt-3.5-turbo",
"openai/gpt-3.5-turbo-16k",
"openai/gpt-4-32k",
"anthropic/claude-2",
"anthropic/claude-instant-v1",
"meta-llama/llama-2-13b-chat",
"meta-llama/llama-2-70b-chat",
]
vertex_chat_models = [
"chat-bison",
"chat-bison@001"
]
vertex_chat_models = ["chat-bison", "chat-bison@001"]
vertex_text_models = [
"text-bison",
"text-bison@001"
]
vertex_text_models = ["text-bison", "text-bison@001"]
huggingface_models = [
"meta-llama/Llama-2-7b-hf",
@ -123,24 +181,56 @@ huggingface_models = [
"meta-llama/Llama-2-13b-chat",
"meta-llama/Llama-2-70b",
"meta-llama/Llama-2-70b-chat",
] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/completion/supported
] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/completion/supported
ai21_models = [
"j2-ultra",
"j2-mid",
"j2-light"
ai21_models = ["j2-ultra", "j2-mid", "j2-light"]
model_list = (
open_ai_chat_completion_models
+ open_ai_text_completion_models
+ cohere_models
+ anthropic_models
+ replicate_models
+ openrouter_models
+ huggingface_models
+ vertex_chat_models
+ vertex_text_models
+ ai21_models
)
provider_list = [
"openai",
"cohere",
"anthropic",
"replicate",
"huggingface",
"together_ai",
"openrouter",
"vertex_ai",
"ai21",
]
model_list = open_ai_chat_completion_models + open_ai_text_completion_models + cohere_models + anthropic_models + replicate_models + openrouter_models + huggingface_models + vertex_chat_models + vertex_text_models + ai21_models
####### EMBEDDING MODELS ###################
open_ai_embedding_models = [
'text-embedding-ada-002'
]
open_ai_embedding_models = ["text-embedding-ada-002"]
from .timeout import timeout
from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost, load_test_model, get_litellm_params
from .main import * # Import all the symbols from main.py
from .testing import *
from .utils import (
client,
exception_type,
get_optional_params,
modify_integration,
token_counter,
cost_per_token,
completion_cost,
get_litellm_params,
Logging
)
from .main import * # type: ignore
from .integrations import *
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
from openai.error import (
AuthenticationError,
InvalidRequestError,
RateLimitError,
ServiceUnavailableError,
OpenAIError,
)

62
litellm/exceptions.py Normal file
View file

@ -0,0 +1,62 @@
## LiteLLM versions of the OpenAI Exception Types
from openai.error import (
AuthenticationError,
InvalidRequestError,
RateLimitError,
ServiceUnavailableError,
OpenAIError,
)
class AuthenticationError(AuthenticationError): # type: ignore
def __init__(self, message, llm_provider):
self.status_code = 401
self.message = message
self.llm_provider = llm_provider
super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class InvalidRequestError(InvalidRequestError): # type: ignore
def __init__(self, message, model, llm_provider):
self.status_code = 400
self.message = message
self.model = model
self.llm_provider = llm_provider
super().__init__(
self.message, f"{self.model}"
) # Call the base class constructor with the parameters it needs
class RateLimitError(RateLimitError): # type: ignore
def __init__(self, message, llm_provider):
self.status_code = 429
self.message = message
self.llm_provider = llm_provider
super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class ServiceUnavailableError(ServiceUnavailableError): # type: ignore
def __init__(self, message, llm_provider):
self.status_code = 500
self.message = message
self.llm_provider = llm_provider
super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class OpenAIError(OpenAIError): # type: ignore
def __init__(self, original_exception):
self.status_code = original_exception.http_status
super().__init__(
http_body=original_exception.http_body,
http_status=original_exception.http_status,
json_body=original_exception.json_body,
headers=original_exception.headers,
code=original_exception.code,
)
self.llm_provider = "openai"

View file

@ -1 +1 @@
from . import *
from . import *

View file

@ -1,53 +1,121 @@
#### What this does ####
# On success + failure, log events to aispend.io
# On success + failure, log events to aispend.io
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
import datetime
model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
"gpt-3.5-turbo": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-35-turbo": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
}, # azure model name
"gpt-3.5-turbo-0613": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
}
class AISpendLogger:
# Class variables or attributes
def __init__(self):
# Instance variables
self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
self.api_key = os.getenv("AISPEND_API_KEY")
def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map
# else default to the average of the costs
prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0
if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
elif "replicate" in model:
prompt_tokens_cost_usd_dollar = (
model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
elif "replicate" in model:
# replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds
model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
else:
# calculate average input cost
# calculate average input cost
input_cost_sum = 0
output_cost_sum = 0
for model in model_cost:
@ -55,37 +123,52 @@ class AISpendLogger:
output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
prompt_tokens_cost_usd_dollar = (
model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, response_obj, start_time, end_time, print_verbose):
# Method definition
try:
print_verbose(f"AISpend Logging - Enters logging function for model {model}")
print_verbose(
f"AISpend Logging - Enters logging function for model {model}"
)
url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
headers = {
'Authorization': f'Bearer {self.api_key}',
'Content-Type': 'application/json'
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
response_timestamp = datetime.datetime.fromtimestamp(int(response_obj["created"])).strftime('%Y-%m-%d')
response_timestamp = datetime.datetime.fromtimestamp(
int(response_obj["created"])
).strftime("%Y-%m-%d")
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
(
prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = self.price_calculator(model, response_obj, start_time, end_time)
prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
data = [{
"requests": 1,
"requests_context": 1,
"context_tokens": response_obj["usage"]["prompt_tokens"],
"requests_generated": 1,
"generated_tokens": response_obj["usage"]["completion_tokens"],
"recorded_date": response_timestamp,
"model_id": response_obj["model"],
"generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
"context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent
}]
data = [
{
"requests": 1,
"requests_context": 1,
"context_tokens": response_obj["usage"]["prompt_tokens"],
"requests_generated": 1,
"generated_tokens": response_obj["usage"]["completion_tokens"],
"recorded_date": response_timestamp,
"model_id": response_obj["model"],
"generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
"context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
}
]
print_verbose(f"AISpend Logging - final data object: {data}")
except:

View file

@ -1,52 +1,120 @@
#### What this does ####
# On success + failure, log events to aispend.io
# On success + failure, log events to aispend.io
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
import datetime
model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
"gpt-3.5-turbo": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-35-turbo": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
}, # azure model name
"gpt-3.5-turbo-0613": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
}
class BerriSpendLogger:
# Class variables or attributes
def __init__(self):
# Instance variables
self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map
# else default to the average of the costs
prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0
if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
elif "replicate" in model:
prompt_tokens_cost_usd_dollar = (
model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
elif "replicate" in model:
# replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds
model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
else:
# calculate average input cost
# calculate average input cost
input_cost_sum = 0
output_cost_sum = 0
for model in model_cost:
@ -54,42 +122,59 @@ class BerriSpendLogger:
output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
prompt_tokens_cost_usd_dollar = (
model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, messages, response_obj, start_time, end_time, print_verbose):
def log_event(
self, model, messages, response_obj, start_time, end_time, print_verbose
):
# Method definition
try:
print_verbose(f"BerriSpend Logging - Enters logging function for model {model}")
print_verbose(
f"BerriSpend Logging - Enters logging function for model {model}"
)
url = f"https://berrispend.berri.ai/spend"
headers = {
'Content-Type': 'application/json'
}
headers = {"Content-Type": "application/json"}
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
(
prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = (
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
)
response_time = (end_time-start_time).total_seconds()
response_time = (end_time - start_time).total_seconds()
if "response" in response_obj:
data = [{
"response_time": response_time,
"model_id": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"response": response_obj['choices'][0]['message']['content'],
"account_id": self.account_id
}]
data = [
{
"response_time": response_time,
"model_id": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"response": response_obj["choices"][0]["message"]["content"],
"account_id": self.account_id,
}
]
elif "error" in response_obj:
data = [{
"response_time": response_time,
"model_id": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"error": response_obj['error'],
"account_id": self.account_id
}]
data = [
{
"response_time": response_time,
"model_id": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"error": response_obj["error"],
"account_id": self.account_id,
}
]
print_verbose(f"BerriSpend Logging - final data object: {data}")
response = requests.post(url, headers=headers, json=data)

View file

@ -2,19 +2,24 @@
# On success, logs events to Helicone
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
class HeliconeLogger:
# Class variables or attributes
helicone_model_list = ["gpt", "claude"]
def __init__(self):
# Instance variables
self.provider_url = "https://api.openai.com/v1"
self.key = os.getenv('HELICONE_API_KEY')
self.key = os.getenv("HELICONE_API_KEY")
def claude_mapping(self, model, messages, response_obj):
from anthropic import HUMAN_PROMPT, AI_PROMPT
prompt = f"{HUMAN_PROMPT}"
prompt = f"{HUMAN_PROMPT}"
for message in messages:
if "role" in message:
if message["role"] == "user":
@ -26,48 +31,84 @@ class HeliconeLogger:
prompt += f"{AI_PROMPT}"
claude_provider_request = {"model": model, "prompt": prompt}
claude_response_obj = {"completion": response_obj['choices'][0]['message']['content'], "model": model, "stop_reason": "stop_sequence"}
claude_response_obj = {
"completion": response_obj["choices"][0]["message"]["content"],
"model": model,
"stop_reason": "stop_sequence",
}
return claude_provider_request, claude_response_obj
def log_success(self, model, messages, response_obj, start_time, end_time, print_verbose):
def log_success(
self, model, messages, response_obj, start_time, end_time, print_verbose
):
# Method definition
try:
print_verbose(f"Helicone Logging - Enters logging function for model {model}")
model = model if any(accepted_model in model for accepted_model in self.helicone_model_list) else "gpt-3.5-turbo"
print_verbose(
f"Helicone Logging - Enters logging function for model {model}"
)
model = (
model
if any(
accepted_model in model
for accepted_model in self.helicone_model_list
)
else "gpt-3.5-turbo"
)
provider_request = {"model": model, "messages": messages}
if "claude" in model:
provider_request, response_obj = self.claude_mapping(model=model, messages=messages, response_obj=response_obj)
if "claude" in model:
provider_request, response_obj = self.claude_mapping(
model=model, messages=messages, response_obj=response_obj
)
providerResponse = {
"json": response_obj,
"headers": {"openai-version": "2020-10-01"},
"status": 200
"json": response_obj,
"headers": {"openai-version": "2020-10-01"},
"status": 200,
}
# Code to be executed
url = "https://api.hconeai.com/oai/v1/log"
headers = {
'Authorization': f'Bearer {self.key}',
'Content-Type': 'application/json'
"Authorization": f"Bearer {self.key}",
"Content-Type": "application/json",
}
start_time_seconds = int(start_time.timestamp())
start_time_milliseconds = int((start_time.timestamp() - start_time_seconds) * 1000)
start_time_milliseconds = int(
(start_time.timestamp() - start_time_seconds) * 1000
)
end_time_seconds = int(end_time.timestamp())
end_time_milliseconds = int((end_time.timestamp() - end_time_seconds) * 1000)
end_time_milliseconds = int(
(end_time.timestamp() - end_time_seconds) * 1000
)
data = {
"providerRequest": {"url": self.provider_url, "json": provider_request, "meta": {"Helicone-Auth": f"Bearer {self.key}"}},
"providerRequest": {
"url": self.provider_url,
"json": provider_request,
"meta": {"Helicone-Auth": f"Bearer {self.key}"},
},
"providerResponse": providerResponse,
"timing": {"startTime": {"seconds": start_time_seconds, "milliseconds": start_time_milliseconds}, "endTime": {"seconds": end_time_seconds, "milliseconds": end_time_milliseconds}} # {"seconds": .., "milliseconds": ..}
"timing": {
"startTime": {
"seconds": start_time_seconds,
"milliseconds": start_time_milliseconds,
},
"endTime": {
"seconds": end_time_seconds,
"milliseconds": end_time_milliseconds,
},
}, # {"seconds": .., "milliseconds": ..}
}
response = requests.post(url, headers=headers, json=data)
if response.status_code == 200:
print_verbose("Helicone Logging - Success!")
else:
print_verbose(f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}")
print_verbose(
f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
)
print_verbose(f"Helicone Logging - Error {response.text}")
except:
# traceback.print_exc()
print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
pass
pass

View file

@ -0,0 +1,74 @@
import requests, traceback, json
class LiteDebugger:
def __init__(self):
self.api_url = "https://api.litellm.ai/debugger"
pass
def input_log_event(self, model, messages, end_user, litellm_call_id, print_verbose):
try:
print_verbose(
f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
)
litellm_data_obj = {
"model": model,
"messages": messages,
"end_user": end_user,
"status": "initiated",
"litellm_call_id": litellm_call_id
}
response = requests.post(url=self.api_url, headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj))
print_verbose(f"LiteDebugger: api response - {response.text}")
except:
print_verbose(f"LiteDebugger: Logging Error - {traceback.format_exc()}")
pass
def log_event(self, model,
messages,
end_user,
response_obj,
start_time,
end_time,
litellm_call_id,
print_verbose,):
try:
print_verbose(
f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
)
total_cost = 0 # [TODO] implement cost tracking
response_time = (end_time - start_time).total_seconds()
if "choices" in response_obj:
litellm_data_obj = {
"response_time": response_time,
"model": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"response": response_obj["choices"][0]["message"]["content"],
"end_user": end_user,
"litellm_call_id": litellm_call_id,
"status": "success"
}
print_verbose(
f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
)
response = requests.post(url=self.api_url, headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj))
elif "error" in response_obj:
if "Unable to map your input to a model." in response_obj["error"]:
total_cost = 0
litellm_data_obj = {
"response_time": response_time,
"model": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"error": response_obj["error"],
"end_user": end_user,
"litellm_call_id": litellm_call_id,
"status": "failure"
}
print_verbose(
f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
)
response = requests.post(url=self.api_url, headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj))
print_verbose(f"LiteDebugger: api response - {response.text}")
except:
print_verbose(f"LiteDebugger: Logging Error - {traceback.format_exc()}")
pass

View file

@ -3,31 +3,94 @@
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
import datetime, subprocess, sys
model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
"gpt-3.5-turbo": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-35-turbo": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
}, # azure model name
"gpt-3.5-turbo-0613": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
}
class Supabase:
# Class variables or attributes
supabase_table_name = "request_logs"
def __init__(self):
# Instance variables
self.supabase_url = os.getenv("SUPABASE_URL")
@ -35,9 +98,11 @@ class Supabase:
try:
import supabase
except ImportError:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'supabase'])
subprocess.check_call([sys.executable, "-m", "pip", "install", "supabase"])
import supabase
self.supabase_client = supabase.create_client(self.supabase_url, self.supabase_key)
self.supabase_client = supabase.create_client(
self.supabase_url, self.supabase_key
)
def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map
@ -45,17 +110,23 @@ class Supabase:
prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0
if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
elif "replicate" in model:
prompt_tokens_cost_usd_dollar = (
model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
elif "replicate" in model:
# replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds
model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
else:
# calculate average input cost
# calculate average input cost
input_cost_sum = 0
output_cost_sum = 0
for model in model_cost:
@ -63,41 +134,104 @@ class Supabase:
output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
prompt_tokens_cost_usd_dollar = (
model_cost[model]["input_cost_per_token"]
* response_obj["usage"]["prompt_tokens"]
)
completion_tokens_cost_usd_dollar = (
model_cost[model]["output_cost_per_token"]
* response_obj["usage"]["completion_tokens"]
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, messages, end_user, response_obj, start_time, end_time, print_verbose):
def input_log_event(self, model, messages, end_user, litellm_call_id, print_verbose):
try:
print_verbose(f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}")
print_verbose(
f"Supabase Logging - Enters input logging function for model {model}"
)
supabase_data_obj = {
"model": model,
"messages": messages,
"end_user": end_user,
"status": "initiated",
"litellm_call_id": litellm_call_id
}
data, count = (
self.supabase_client.table(self.supabase_table_name)
.insert(supabase_data_obj)
.execute()
)
print(f"data: {data}")
except:
print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
pass
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
def log_event(
self,
model,
messages,
end_user,
response_obj,
start_time,
end_time,
litellm_call_id,
print_verbose,
):
try:
print_verbose(
f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}"
)
response_time = (end_time-start_time).total_seconds()
(
prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = (
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
)
response_time = (end_time - start_time).total_seconds()
if "choices" in response_obj:
supabase_data_obj = {
"response_time": response_time,
"model": response_obj["model"],
"total_cost": total_cost,
"total_cost": total_cost,
"messages": messages,
"response": response_obj['choices'][0]['message']['content'],
"end_user": end_user
"response": response_obj["choices"][0]["message"]["content"],
"end_user": end_user,
"litellm_call_id": litellm_call_id,
"status": "success"
}
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}")
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute()
print_verbose(
f"Supabase Logging - final data object: {supabase_data_obj}"
)
data, count = (
self.supabase_client.table(self.supabase_table_name)
.upsert(supabase_data_obj)
.execute()
)
elif "error" in response_obj:
if "Unable to map your input to a model." in response_obj["error"]:
total_cost = 0
supabase_data_obj = {
"response_time": response_time,
"model": response_obj["model"],
"total_cost": total_cost,
"total_cost": total_cost,
"messages": messages,
"error": response_obj['error'],
"end_user": end_user
"error": response_obj["error"],
"end_user": end_user,
"litellm_call_id": litellm_call_id,
"status": "failure"
}
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}")
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute()
print_verbose(
f"Supabase Logging - final data object: {supabase_data_obj}"
)
data, count = (
self.supabase_client.table(self.supabase_table_name)
.upsert(supabase_data_obj)
.execute()
)
except:
# traceback.print_exc()
print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")

View file

@ -1 +1 @@
from . import *
from . import *

View file

@ -1,59 +1,78 @@
import os, json
from enum import Enum
import requests
from litellm import logging
import time
import time
from typing import Callable
from litellm.utils import ModelResponse
class AnthropicConstants(Enum):
HUMAN_PROMPT = "\n\nHuman:"
AI_PROMPT = "\n\nAssistant:"
class AnthropicError(Exception):
def __init__(self, status_code, message):
self.status_code = status_code
self.message = message
super().__init__(self.message) # Call the base class constructor with the parameters it needs
super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class AnthropicLLM:
def __init__(self, encoding, default_max_tokens_to_sample, api_key=None):
class AnthropicLLM:
def __init__(self, encoding, default_max_tokens_to_sample, logging_obj, api_key=None):
self.encoding = encoding
self.default_max_tokens_to_sample = default_max_tokens_to_sample
self.completion_url = "https://api.anthropic.com/v1/complete"
self.api_key = api_key
self.logging_obj = logging_obj
self.validate_environment(api_key=api_key)
def validate_environment(self, api_key): # set up the environment required to run the model
# set the api key
try:
self.api_key = os.getenv("ANTHROPIC_API_KEY") if "ANTHROPIC_API_KEY" in os.environ else api_key
if self.api_key == None:
raise Exception
self.headers = {
"accept": "application/json",
"anthropic-version": "2023-06-01",
"content-type": "application/json",
"x-api-key": self.api_key
}
except:
raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params")
pass
def validate_environment(
self, api_key
): # set up the environment required to run the model
# set the api key
if self.api_key == None:
raise ValueError(
"Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
)
self.api_key = api_key
self.headers = {
"accept": "application/json",
"anthropic-version": "2023-06-01",
"content-type": "application/json",
"x-api-key": self.api_key,
}
def completion(self, model: str, messages: list, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
def completion(
self,
model: str,
messages: list,
model_response: ModelResponse,
print_verbose: Callable,
optional_params=None,
litellm_params=None,
logger_fn=None,
): # logic for parsing in - calling - parsing out model completion calls
model = model
prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
for message in messages:
if "role" in message:
if message["role"] == "user":
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
prompt += (
f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
)
else:
prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
prompt += (
f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
)
else:
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
prompt += f"{AnthropicConstants.AI_PROMPT.value}"
if "max_tokens" in optional_params and optional_params["max_tokens"] != float('inf'):
if "max_tokens" in optional_params and optional_params["max_tokens"] != float(
"inf"
):
max_tokens = optional_params["max_tokens"]
else:
max_tokens = self.default_max_tokens_to_sample
@ -61,39 +80,51 @@ class AnthropicLLM:
"model": model,
"prompt": prompt,
"max_tokens_to_sample": max_tokens,
**optional_params
**optional_params,
}
## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn)
self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data})
## COMPLETION CALL
response = requests.post(self.completion_url, headers=self.headers, data=json.dumps(data))
response = requests.post(
self.completion_url, headers=self.headers, data=json.dumps(data)
)
if "stream" in optional_params and optional_params["stream"] == True:
return response.iter_lines()
else:
## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn)
self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data})
print_verbose(f"raw model_response: {response.text}")
## RESPONSE OBJECT
completion_response = response.json()
if "error" in completion_response:
raise AnthropicError(message=completion_response["error"], status_code=response.status_code)
raise AnthropicError(
message=completion_response["error"],
status_code=response.status_code,
)
else:
model_response["choices"][0]["message"]["content"] = completion_response["completion"]
model_response["choices"][0]["message"][
"content"
] = completion_response["completion"]
## CALCULATING USAGE
prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the anthropic tokenizer here
completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the anthropic tokenizer here
prompt_tokens = len(
self.encoding.encode(prompt)
) ##[TODO] use the anthropic tokenizer here
completion_tokens = len(
self.encoding.encode(model_response["choices"][0]["message"]["content"])
) ##[TODO] use the anthropic tokenizer here
model_response["created"] = time.time()
model_response["model"] = model
model_response["usage"] = {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens
}
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response
def embedding(): # logic for parsing in - calling - parsing out model embedding calls
pass
def embedding(
self,
): # logic for parsing in - calling - parsing out model embedding calls
pass

View file

@ -1,11 +1,16 @@
## This is a template base class to be used for adding new LLM providers via API calls
class BaseLLM():
def validate_environment(): # set up the environment required to run the model
pass
def completion(): # logic for parsing in - calling - parsing out model completion calls
class BaseLLM:
def validate_environment(self): # set up the environment required to run the model
pass
def embedding(): # logic for parsing in - calling - parsing out model embedding calls
pass
def completion(
self,
): # logic for parsing in - calling - parsing out model completion calls
pass
def embedding(
self,
): # logic for parsing in - calling - parsing out model embedding calls
pass

View file

@ -2,39 +2,60 @@
import os, json
from enum import Enum
import requests
from litellm import logging
import time
import time
from typing import Callable
from litellm.utils import ModelResponse
from typing import Optional
class HuggingfaceError(Exception):
def __init__(self, status_code, message):
self.status_code = status_code
self.message = message
super().__init__(self.message) # Call the base class constructor with the parameters it needs
super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class HuggingfaceRestAPILLM():
def __init__(self, encoding, api_key=None) -> None:
class HuggingfaceRestAPILLM:
def __init__(self, encoding, logging_obj, api_key=None) -> None:
self.encoding = encoding
self.logging_obj = logging_obj
self.validate_environment(api_key=api_key)
def validate_environment(self, api_key): # set up the environment required to run the model
def validate_environment(
self, api_key
): # set up the environment required to run the model
self.headers = {
"content-type": "application/json",
}
# get the api key if it exists in the environment or is passed in, but don't require it
self.api_key = os.getenv("HF_TOKEN") if "HF_TOKEN" in os.environ else api_key
self.api_key = api_key
if self.api_key != None:
self.headers["Authorization"] = f"Bearer {self.api_key}"
self.headers["Authorization"] = f"Bearer {self.api_key}"
def completion(self, model: str, messages: list, custom_api_base: str, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
def completion(
self,
model: str,
messages: list,
custom_api_base: str,
model_response: ModelResponse,
print_verbose: Callable,
optional_params=None,
litellm_params=None,
logger_fn=None,
): # logic for parsing in - calling - parsing out model completion calls
completion_url: str = ""
if custom_api_base:
completion_url = custom_api_base
elif "HF_API_BASE" in os.environ:
completion_url = os.getenv("HF_API_BASE")
completion_url = os.getenv("HF_API_BASE", "")
else:
completion_url = f"https://api-inference.huggingface.co/models/{model}"
prompt = ""
if "meta-llama" in model and "chat" in model: # use the required special tokens for meta-llama - https://huggingface.co/blog/llama2#how-to-prompt-llama-2
if (
"meta-llama" in model and "chat" in model
): # use the required special tokens for meta-llama - https://huggingface.co/blog/llama2#how-to-prompt-llama-2
prompt = "<s>"
for message in messages:
if message["role"] == "system":
@ -46,49 +67,60 @@ class HuggingfaceRestAPILLM():
else:
for message in messages:
prompt += f"{message['content']}"
### MAP INPUT PARAMS
# max tokens
### MAP INPUT PARAMS
# max tokens
if "max_tokens" in optional_params:
value = optional_params.pop("max_tokens")
optional_params["max_new_tokens"] = value
data = {
"inputs": prompt,
# "parameters": optional_params
"parameters": optional_params
}
## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn)
self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data})
## COMPLETION CALL
response = requests.post(completion_url, headers=self.headers, data=json.dumps(data))
response = requests.post(
completion_url, headers=self.headers, data=json.dumps(data)
)
if "stream" in optional_params and optional_params["stream"] == True:
return response.iter_lines()
else:
## LOGGING
logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn)
print_verbose(f"raw model_response: {response.text}")
self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data})
## RESPONSE OBJECT
completion_response = response.json()
print_verbose(f"response: {completion_response}")
if isinstance(completion_response, dict) and "error" in completion_response:
print_verbose(f"completion error: {completion_response['error']}")
print_verbose(f"response.status_code: {response.status_code}")
raise HuggingfaceError(message=completion_response["error"], status_code=response.status_code)
raise HuggingfaceError(
message=completion_response["error"],
status_code=response.status_code,
)
else:
model_response["choices"][0]["message"]["content"] = completion_response[0]["generated_text"]
model_response["choices"][0]["message"][
"content"
] = completion_response[0]["generated_text"]
## CALCULATING USAGE
prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the llama2 tokenizer here
completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the llama2 tokenizer here
prompt_tokens = len(
self.encoding.encode(prompt)
) ##[TODO] use the llama2 tokenizer here
completion_tokens = len(
self.encoding.encode(model_response["choices"][0]["message"]["content"])
) ##[TODO] use the llama2 tokenizer here
model_response["created"] = time.time()
model_response["model"] = model
model_response["usage"] = {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens
}
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response
pass
def embedding(): # logic for parsing in - calling - parsing out model embedding calls
pass
def embedding(
self,
): # logic for parsing in - calling - parsing out model embedding calls
pass

File diff suppressed because it is too large Load diff

137
litellm/testing.py Normal file
View file

@ -0,0 +1,137 @@
import litellm
import time
from concurrent.futures import ThreadPoolExecutor
import traceback
def testing_batch_completion(*args, **kwargs):
try:
batch_models = (
args[0] if len(args) > 0 else kwargs.pop("models")
) ## expected input format- ["gpt-3.5-turbo", {"model": "qvv0xeq", "custom_llm_provider"="baseten"}...]
batch_messages = args[1] if len(args) > 1 else kwargs.pop("messages")
results = []
completions = []
exceptions = []
times = []
with ThreadPoolExecutor() as executor:
for model in batch_models:
kwargs_modified = dict(kwargs)
args_modified = list(args)
if len(args) > 0:
args_modified[0] = model["model"]
else:
kwargs_modified["model"] = (
model["model"]
if isinstance(model, dict) and "model" in model
else model
) # if model is a dictionary get it's value else assume it's a string
kwargs_modified["custom_llm_provider"] = (
model["custom_llm_provider"]
if isinstance(model, dict) and "custom_llm_provider" in model
else None
)
kwargs_modified["custom_api_base"] = (
model["custom_api_base"]
if isinstance(model, dict) and "custom_api_base" in model
else None
)
for message_list in batch_messages:
if len(args) > 1:
args_modified[1] = message_list
future = executor.submit(
litellm.completion, *args_modified, **kwargs_modified
)
else:
kwargs_modified["messages"] = message_list
future = executor.submit(
litellm.completion, *args_modified, **kwargs_modified
)
completions.append((future, message_list))
# Retrieve the results and calculate elapsed time for each completion call
for completion in completions:
future, message_list = completion
start_time = time.time()
try:
result = future.result()
end_time = time.time()
elapsed_time = end_time - start_time
result_dict = {
"status": "succeeded",
"response": future.result(),
"prompt": message_list,
"response_time": elapsed_time,
}
results.append(result_dict)
except Exception as e:
end_time = time.time()
elapsed_time = end_time - start_time
result_dict = {
"status": "failed",
"response": e,
"response_time": elapsed_time,
}
results.append(result_dict)
return results
except:
traceback.print_exc()
def duration_test_model(original_function):
def wrapper_function(*args, **kwargs):
# Code to be executed before the original function
duration = kwargs.pop("duration", None)
interval = kwargs.pop("interval", None)
results = []
if duration and interval:
start_time = time.time()
end_time = start_time + duration # default to 1hr duration
while time.time() < end_time:
result = original_function(*args, **kwargs)
results.append(result)
time.sleep(interval)
else:
result = original_function(*args, **kwargs)
results = result
return results
# Return the wrapper function
return wrapper_function
@duration_test_model
def load_test_model(models: list, prompt: str = "", num_calls: int = 0):
test_calls = 100
if num_calls:
test_calls = num_calls
input_prompt = prompt if prompt else "Hey, how's it going?"
messages = (
[{"role": "user", "content": prompt}]
if prompt
else [{"role": "user", "content": input_prompt}]
)
full_message_list = [
messages for _ in range(test_calls)
] # call it as many times as set by user to load test models
start_time = time.time()
try:
results = testing_batch_completion(models=models, messages=full_message_list)
end_time = time.time()
response_time = end_time - start_time
return {
"total_response_time": response_time,
"calls_made": test_calls,
"prompt": input_prompt,
"results": results,
}
except Exception as e:
traceback.print_exc()
end_time = time.time()
response_time = end_time - start_time
return {
"total_response_time": response_time,
"calls_made": test_calls,
"prompt": input_prompt,
"exception": e,
}

View file

@ -3,39 +3,51 @@
import sys, os
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
litellm.set_verbose = False
def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}")
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
messages = [{"content": user_message, "role": "user"}]
## Test 1: Setting key dynamically
temp_key = os.environ.get("ANTHROPIC_API_KEY")
temp_key = os.environ.get("ANTHROPIC_API_KEY", "")
os.environ["ANTHROPIC_API_KEY"] = "bad-key"
# test on openai completion call
# test on openai completion call
try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn, api_key=temp_key)
response = completion(
model="claude-instant-1",
messages=messages,
logger_fn=logger_fn,
api_key=temp_key,
)
print(f"response: {response}")
except:
print(f"error occurred: {traceback.format_exc()}")
print(f"error occurred: {traceback.format_exc()}")
pass
os.environ["ANTHROPIC_API_KEY"] = temp_key
## Test 2: Setting key via __init__ params
litellm.anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
litellm.anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
os.environ.pop("ANTHROPIC_API_KEY")
# test on openai completion call
# test on openai completion call
try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
print(f"response: {response}")
except:
print(f"error occurred: {traceback.format_exc()}")
print(f"error occurred: {traceback.format_exc()}")
pass
os.environ["ANTHROPIC_API_KEY"] = temp_key

View file

@ -5,17 +5,22 @@ import sys, os
import pytest
import traceback
import asyncio
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from litellm import acompletion
async def test_get_response():
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
messages = [{"content": user_message, "role": "user"}]
try:
response = await acompletion(model="gpt-3.5-turbo", messages=messages)
except Exception as e:
pytest.fail(f"error occurred: {e}")
return response
response = asyncio.run(test_get_response())
print(response)
print(response)

View file

@ -1,16 +1,17 @@
#### What this tests ####
# This tests chaos monkeys - if random parts of the system are broken / things aren't sent correctly - what happens.
# Expect to add more edge cases to this over time.
# Expect to add more edge cases to this over time.
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
# Get the current directory of the script
current_dir = os.path.dirname(os.path.abspath(__file__))
# Get the parent directory by joining the current directory with '..'
parent_dir = os.path.join(current_dir, '../..')
parent_dir = os.path.join(current_dir, "../..")
# Add the parent directory to the system path
sys.path.append(parent_dir)
@ -26,7 +27,7 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
messages = [{"content": user_message, "role": "user"}]
model_val = None
@ -35,18 +36,18 @@ def test_completion_with_empty_model():
try:
response = completion(model=model_val, messages=messages)
except Exception as e:
print(f"error occurred: {e}")
print(f"error occurred: {e}")
pass
#bad key
# bad key
temp_key = os.environ.get("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = "bad-key"
# test on openai completion call
# test on openai completion call
try:
response = completion(model="gpt-3.5-turbo", messages=messages)
print(f"response: {response}")
except:
print(f"error occurred: {traceback.format_exc()}")
print(f"error occurred: {traceback.format_exc()}")
pass
os.environ["OPENAI_API_KEY"] = temp_key
os.environ["OPENAI_API_KEY"] = str(temp_key) # this passes linting#5

View file

@ -3,7 +3,10 @@
import sys, os
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import batch_completion
@ -14,4 +17,4 @@ model = "gpt-3.5-turbo"
result = batch_completion(model=model, messages=messages)
print(result)
print(len(result))
print(len(result))

View file

@ -19,7 +19,7 @@
# #openai call
# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
# #bad request call
# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])
# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])

View file

@ -0,0 +1,52 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import embedding, completion
messages = [{"role": "user", "content": "who is ishaan Github? "}]
# test if response cached
def test_caching():
try:
litellm.caching = True
response1 = completion(model="gpt-3.5-turbo", messages=messages)
response2 = completion(model="gpt-3.5-turbo", messages=messages)
print(f"response1: {response1}")
print(f"response2: {response2}")
litellm.caching = False
if response2 != response1:
print(f"response1: {response1}")
print(f"response2: {response2}")
pytest.fail(f"Error occurred: {e}")
except Exception as e:
litellm.caching = False
print(f"error occurred: {traceback.format_exc()}")
pytest.fail(f"Error occurred: {e}")
def test_caching_with_models():
litellm.caching_with_models = True
response2 = completion(model="gpt-3.5-turbo", messages=messages)
response3 = completion(model="command-nightly", messages=messages)
print(f"response2: {response2}")
print(f"response3: {response3}")
litellm.caching_with_models = False
if response3 == response2:
# if models are different, it should not return cached response
print(f"response2: {response2}")
print(f"response3: {response3}")
pytest.fail(f"Error occurred: {e}")

View file

@ -5,7 +5,9 @@ import sys, os
import traceback
import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
@ -14,53 +16,71 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
litellm.set_verbose = True
def logger_fn(model_call_object: dict):
# print(f"model call details: {model_call_object}")
pass
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
messages = [{"content": user_message, "role": "user"}]
def test_completion_openai():
try:
print("running query")
response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn)
response = completion(
model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn
)
print(f"response: {response}")
# Add any assertions here to check the response
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
test_completion_openai()
def test_completion_claude():
try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
# Add any assertions here to check the response
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_claude()
def test_completion_non_openai():
try:
response = completion(model="command-nightly", messages=messages, logger_fn=logger_fn)
response = completion(
model="command-nightly", messages=messages, logger_fn=logger_fn
)
# Add any assertions here to check the response
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_non_openai()
def test_embedding_openai():
try:
response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
response = embedding(
model="text-embedding-ada-002", input=[user_message], logger_fn=logger_fn
)
# Add any assertions here to check the response
print(f"response: {str(response)[:50]}")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_bad_azure_embedding():
try:
response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
response = embedding(
model="chatgpt-test", input=[user_message], logger_fn=logger_fn
)
# Add any assertions here to check the response
print(f"response: {str(response)[:50]}")
except Exception as e:
pass
# def test_good_azure_embedding():
# try:
# response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
@ -68,4 +88,3 @@ def test_bad_azure_embedding():
# print(f"response: {str(response)[:50]}")
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")

View file

@ -1,53 +1,79 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import embedding, completion
# from infisical import InfisicalClient
# litellm.set_verbose = True
# litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
user_message = "Hello, whats the weather in San Francisco??"
messages = [{ "content": user_message,"role": "user"}]
messages = [{"content": user_message, "role": "user"}]
def logger_fn(user_model_dict):
print(f"user_model_dict: {user_model_dict}")
def test_completion_claude():
def test_completion_custom_provider_model_name():
try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
response = completion(
model="together_ai/togethercomputer/llama-2-70b-chat", messages=messages, logger_fn=logger_fn
)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_custom_provider_model_name()
def test_completion_claude():
try:
response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_claude_stream():
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "how does a court case get to the Supreme Court?"}
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(model="claude-2", messages=messages, stream=True)
# Add any assertions here to check the response
for chunk in response:
print(chunk['choices'][0]['delta']) # same as openai format
print(chunk["choices"][0]["delta"]) # same as openai format
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_hf_api():
try:
user_message = "write some code to find the sum of two numbers"
messages = [{ "content": user_message,"role": "user"}]
response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# def test_completion_hf_api():
# try:
# user_message = "write some code to find the sum of two numbers"
# messages = [{ "content": user_message,"role": "user"}]
# response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")
# # Add any assertions here to check the response
# print(response)
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# def test_completion_hf_deployed_api():
# try:
@ -62,65 +88,140 @@ def test_completion_hf_api():
def test_completion_cohere():
try:
response = completion(model="command-nightly", messages=messages, max_tokens=500)
response = completion(
model="command-nightly", messages=messages, max_tokens=100, logit_bias={40: 10}
)
# Add any assertions here to check the response
print(response)
response_str = response["choices"][0]["message"]["content"]
print(f"str response{response_str}")
response_str_2 = response.choices[0].message.content
if type(response_str) != str:
pytest.fail(f"Error occurred: {e}")
if type(response_str_2) != str:
pytest.fail(f"Error occurred: {e}")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_cohere_stream():
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "how does a court case get to the Supreme Court?"}
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(model="command-nightly", messages=messages, stream=True, max_tokens=50)
response = completion(
model="command-nightly", messages=messages, stream=True, max_tokens=50
)
# Add any assertions here to check the response
for chunk in response:
print(chunk['choices'][0]['delta']) # same as openai format
print(chunk["choices"][0]["delta"]) # same as openai format
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_openai():
try:
response = completion(model="gpt-3.5-turbo", messages=messages)
response_str = response["choices"][0]["message"]["content"]
response_str_2 = response.choices[0].message.content
assert response_str == response_str_2
assert type(response_str) == str
assert len(response_str) > 1
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_text_openai():
try:
response = completion(model="text-davinci-003", messages=messages)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_optional_params():
try:
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai")
response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_openrouter():
try:
response = completion(model="google/palm-2-chat-bison", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai")
response = completion(
model="google/palm-2-chat-bison",
messages=messages,
temperature=0.5,
top_p=0.1,
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_more_optional_params():
try:
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, frequency_penalty=-0.5, logit_bias={123: 5}, user="ishaan_dev@berri.ai")
response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
n=2,
max_tokens=150,
presence_penalty=0.5,
frequency_penalty=-0.5,
logit_bias={123: 5},
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response
print(response)
response_str = response["choices"][0]["message"]["content"]
response_str_2 = response.choices[0].message.content
print(response["choices"][0]["message"]["content"])
print(response.choices[0].message.content)
if type(response_str) != str:
pytest.fail(f"Error occurred: {e}")
if type(response_str_2) != str:
pytest.fail(f"Error occurred: {e}")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_stream():
try:
response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
n=2,
max_tokens=150,
presence_penalty=0.5,
stream=True,
frequency_penalty=-0.5,
logit_bias={27000: 5},
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_stream():
try:
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, stream=True, frequency_penalty=-0.5, logit_bias={27000: 5}, user="ishaan_dev@berri.ai")
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_functions():
function1 = [
@ -132,33 +233,39 @@ def test_completion_openai_with_functions():
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"]
}
"required": ["location"],
},
}
]
try:
response = completion(model="gpt-3.5-turbo", messages=messages, functions=function1)
response = completion(
model="gpt-3.5-turbo", messages=messages, functions=function1
)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_azure():
try:
response = completion(model="gpt-3.5-turbo", deployment_id="chatgpt-test", messages=messages, custom_llm_provider="azure")
response = completion(
model="gpt-3.5-turbo",
deployment_id="chatgpt-test",
messages=messages,
custom_llm_provider="azure",
)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
def test_completion_replicate_llama_stream():
model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
try:
@ -170,59 +277,69 @@ def test_completion_replicate_llama_stream():
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_replicate_stability_stream():
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
try:
response = completion(model=model_name, messages=messages, stream=True, custom_llm_provider="replicate")
response = completion(
model=model_name,
messages=messages,
stream=True,
custom_llm_provider="replicate",
)
# Add any assertions here to check the response
for chunk in response:
print(chunk['choices'][0]['delta'])
print(chunk["choices"][0]["delta"])
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_replicate_stability():
model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
try:
response = completion(model=model_name, messages=messages, custom_llm_provider="replicate")
response = completion(
model=model_name, messages=messages, custom_llm_provider="replicate"
)
# Add any assertions here to check the response
for result in response:
print(result)
print(response)
response_str = response["choices"][0]["message"]["content"]
response_str_2 = response.choices[0].message.content
print(response_str)
print(response_str_2)
if type(response_str) != str:
pytest.fail(f"Error occurred: {e}")
if type(response_str_2) != str:
pytest.fail(f"Error occurred: {e}")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
######## Test TogetherAI ########
def test_completion_together_ai():
model_name = "togethercomputer/llama-2-70b-chat"
try:
response = completion(model=model_name, messages=messages, custom_llm_provider="together_ai")
response = completion(model=model_name, messages=messages)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_together_ai_stream():
model_name = "togethercomputer/llama-2-70b-chat"
try:
response = completion(model=model_name, messages=messages, custom_llm_provider="together_ai", stream=True)
# Add any assertions here to check the response
print(response)
for chunk in response:
print(chunk['choices'][0]['delta']) # same as openai format
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_petals():
model_name = "stabilityai/StableBeluga2"
try:
response = completion(model=model_name, messages=messages, custom_llm_provider="petals", force_timeout=120)
response = completion(
model=model_name,
messages=messages,
custom_llm_provider="petals",
force_timeout=120,
)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# def test_baseten_falcon_7bcompletion():
# model_name = "qvv0xeq"
# try:
@ -270,7 +387,6 @@ def test_petals():
# pytest.fail(f"Error occurred: {e}")
#### Test A121 ###################
# def test_completion_ai21():
# model_name = "j2-light"
@ -281,7 +397,7 @@ def test_petals():
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test config file with completion #
# test config file with completion #
# def test_completion_openai_config():
# try:
# litellm.config_path = "../config.json"
@ -294,4 +410,22 @@ def test_petals():
# pytest.fail(f"Error occurred: {e}")
# import asyncio
# def test_completion_together_ai_stream():
# user_message = "Write 1pg about YC & litellm"
# messages = [{ "content": user_message,"role": "user"}]
# try:
# response = completion(model="togethercomputer/llama-2-70b-chat", messages=messages, stream=True, max_tokens=800)
# print(response)
# asyncio.run(get_response(response))
# # print(string_response)
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# async def get_response(generator):
# async for elem in generator:
# print(elem)
# return
# test_completion_together_ai_stream()

View file

@ -1,20 +1,33 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import litellm
from litellm import completion
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import completion
def logging_fn(model_call_dict):
print(f"model call details: {model_call_dict}")
models = ["gorilla-7b-hf-v1", "gpt-4"]
custom_llm_provider = None
messages = [{"role": "user", "content": "Hey, how's it going?"}]
for model in models: # iterate through list
for model in models: # iterate through list
custom_api_base = None
if model == "gorilla-7b-hf-v1":
if model == "gorilla-7b-hf-v1":
custom_llm_provider = "custom_openai"
custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base=custom_api_base, logger_fn=logging_fn)
completion(
model=model,
messages=messages,
custom_llm_provider=custom_llm_provider,
custom_api_base=custom_api_base,
logger_fn=logging_fn,
)

View file

@ -1,20 +1,24 @@
import sys, os
import traceback
import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
from infisical import InfisicalClient
# litellm.set_verbose = True
litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
# # litellm.set_verbose = True
# litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
def test_openai_embedding():
try:
response = embedding(model='text-embedding-ada-002', input=["good morning from litellm"])
response = embedding(
model="text-embedding-ada-002", input=["good morning from litellm"]
)
# Add any assertions here to check the response
print(f"response: {str(response)}")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
pytest.fail(f"Error occurred: {e}")

View file

@ -1,10 +1,21 @@
# from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError
import os
import os
import sys
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion, AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
from litellm import (
embedding,
completion,
AuthenticationError,
InvalidRequestError,
RateLimitError,
ServiceUnavailableError,
OpenAIError,
)
from concurrent.futures import ThreadPoolExecutor
import pytest
@ -23,8 +34,10 @@ litellm.failure_callback = ["sentry"]
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
test_model = "claude-instant-1"
models = ["claude-instant-1"]
def logging_fn(model_call_dict):
if "model" in model_call_dict:
if "model" in model_call_dict:
print(f"model_call_dict: {model_call_dict['model']}")
else:
print(f"model_call_dict: {model_call_dict}")
@ -38,13 +51,18 @@ def test_context_window(model):
try:
model = "chatgpt-test"
print(f"model: {model}")
response = completion(model=model, messages=messages, custom_llm_provider="azure", logger_fn=logging_fn)
response = completion(
model=model,
messages=messages,
custom_llm_provider="azure",
logger_fn=logging_fn,
)
print(f"response: {response}")
except InvalidRequestError:
print("InvalidRequestError")
except InvalidRequestError as e:
print(f"InvalidRequestError: {e.llm_provider}")
return
except OpenAIError:
print("OpenAIError")
except OpenAIError as e:
print(f"OpenAIError: {e.llm_provider}")
return
except Exception as e:
print("Uncaught Error in test_context_window")
@ -52,14 +70,17 @@ def test_context_window(model):
print(f"Uncaught Exception - {e}")
pytest.fail(f"Error occurred: {e}")
return
test_context_window(test_model)
# Test 2: InvalidAuth Errors
@pytest.mark.parametrize("model", models)
def invalid_auth(model): # set the model key to an invalid key, depending on the model
messages = [{ "content": "Hello, how are you?","role": "user"}]
def invalid_auth(model): # set the model key to an invalid key, depending on the model
messages = [{"content": "Hello, how are you?", "role": "user"}]
temporary_key = None
try:
try:
custom_llm_provider = None
if model == "gpt-3.5-turbo":
temporary_key = os.environ["OPENAI_API_KEY"]
@ -74,22 +95,29 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
elif model == "command-nightly":
temporary_key = os.environ["COHERE_API_KEY"]
os.environ["COHERE_API_KEY"] = "bad-key"
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
temporary_key = os.environ["REPLICATE_API_KEY"]
elif (
model
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
):
temporary_key = os.environ["REPLICATE_API_KEY"]
os.environ["REPLICATE_API_KEY"] = "bad-key"
print(f"model: {model}")
response = completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider)
response = completion(
model=model, messages=messages, custom_llm_provider=custom_llm_provider
)
print(f"response: {response}")
except AuthenticationError as e:
print(f"AuthenticationError Caught Exception - {e}")
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
print(f"AuthenticationError Caught Exception - {e.llm_provider}")
except (
OpenAIError
): # is at least an openai error -> in case of random model errors - e.g. overloaded server
print(f"OpenAIError Caught Exception - {e}")
except Exception as e:
print(type(e))
print(e.__class__.__name__)
print(f"Uncaught Exception - {e}")
pytest.fail(f"Error occurred: {e}")
if temporary_key != None: # reset the key
if temporary_key != None: # reset the key
if model == "gpt-3.5-turbo":
os.environ["OPENAI_API_KEY"] = temporary_key
elif model == "chatgpt-test":
@ -99,13 +127,18 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
os.environ["ANTHROPIC_API_KEY"] = temporary_key
elif model == "command-nightly":
os.environ["COHERE_API_KEY"] = temporary_key
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
elif (
model
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
):
os.environ["REPLICATE_API_KEY"] = temporary_key
return
invalid_auth(test_model)
# # Test 3: Rate Limit Errors
# # Test 3: Rate Limit Errors
# def test_model(model):
# try:
# try:
# sample_text = "how does a court case get to the Supreme Court?" * 50000
# messages = [{ "content": sample_text,"role": "user"}]
# custom_llm_provider = None
@ -142,5 +175,3 @@ invalid_auth(test_model)
# accuracy_score = counts[True]/(counts[True] + counts[False])
# print(f"accuracy_score: {accuracy_score}")

View file

@ -5,7 +5,9 @@ import sys, os
import traceback
import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
@ -14,11 +16,15 @@ litellm.success_callback = ["helicone"]
litellm.set_verbose = True
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
messages = [{"content": user_message, "role": "user"}]
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
# openai call
response = completion(
model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]
)
#cohere call
response = completion(model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}])
# cohere call
response = completion(
model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]
)

View file

@ -0,0 +1,26 @@
# #### What this tests ####
# # This tests if logging to the litedebugger integration actually works
# # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
# import sys, os
# import traceback
# import pytest
# sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
# import litellm
# from litellm import embedding, completion
# litellm.input_callback = ["lite_debugger"]
# litellm.success_callback = ["lite_debugger"]
# litellm.failure_callback = ["lite_debugger"]
# litellm.set_verbose = True
# user_message = "Hello, how are you?"
# messages = [{ "content": user_message,"role": "user"}]
# #openai call
# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
# #bad request call
# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])

View file

@ -1,9 +1,37 @@
import sys, os
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import litellm
from litellm import load_test_model
model="gpt-3.5-turbo"
result = load_test_model(model=model, num_calls=5)
print(result)
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import load_test_model, testing_batch_completion
# ## Load Test Model
# model="gpt-3.5-turbo"
# result = load_test_model(model=model, num_calls=5)
# print(result)
# print(len(result["results"]))
# ## Duration Test Model
# model="gpt-3.5-turbo"
# result = load_test_model(model=model, num_calls=5, duration=15, interval=15) # duration test the model for 2 minutes, sending 5 calls every 15s
# print(result)
## Quality Test across Model
models = [
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-4",
"claude-instant-1",
{
"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781",
"custom_llm_provider": "replicate",
},
]
messages = [
[{"role": "user", "content": "What is your name?"}],
[{"role": "user", "content": "Hey, how's it going?"}],
]
result = testing_batch_completion(models=models, messages=messages)
print(result)

View file

@ -3,7 +3,10 @@
import sys, os
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
@ -11,49 +14,53 @@ litellm.set_verbose = False
score = 0
def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}")
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
# test on openai completion call
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
# test on openai completion call
try:
response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn)
score +=1
score += 1
except:
print(f"error occurred: {traceback.format_exc()}")
print(f"error occurred: {traceback.format_exc()}")
pass
# test on non-openai completion call
# test on non-openai completion call
try:
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
response = completion(
model="claude-instant-1", messages=messages, logger_fn=logger_fn
)
print(f"claude response: {response}")
score +=1
score += 1
except:
print(f"error occurred: {traceback.format_exc()}")
print(f"error occurred: {traceback.format_exc()}")
pass
# # test on openai embedding call
# try:
# # test on openai embedding call
# try:
# response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
# score +=1
# score +=1
# except:
# traceback.print_exc()
# # test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model
# try:
# try:
# response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
# except:
# score +=1 # expect this to fail
# traceback.print_exc()
# # test on good azure openai embedding call
# try:
# # test on good azure openai embedding call
# try:
# response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
# score +=1
# score +=1
# except:
# traceback.print_exc()
# print(f"Score: {score}, Overall score: {score/5}")
# print(f"Score: {score}, Overall score: {score/5}")

View file

@ -3,7 +3,10 @@
import sys, os
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
@ -15,11 +18,11 @@ litellm.set_verbose = True
model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
messages = [{"content": user_message, "role": "user"}]
for model in model_fallback_list:
try:
response = embedding(model="text-embedding-ada-002", input=[user_message])
response = completion(model=model, messages=messages)
except Exception as e:
print(f"error occurred: {traceback.format_exc()}")
print(f"error occurred: {traceback.format_exc()}")

View file

@ -0,0 +1,23 @@
# #### What this tests ####
# # This tests if the litellm model response type is returnable in a flask app
# import sys, os
# import traceback
# from flask import Flask, request, jsonify, abort, Response
# sys.path.insert(0, os.path.abspath('../../..')) # Adds the parent directory to the system path
# import litellm
# from litellm import completion
# litellm.set_verbose = False
# app = Flask(__name__)
# @app.route('/')
# def hello():
# data = request.json
# return completion(**data)
# if __name__ == '__main__':
# from waitress import serve
# serve(app, host='localhost', port=8080, threads=10)

View file

@ -0,0 +1,14 @@
# import requests, json
# BASE_URL = 'http://localhost:8080'
# def test_hello_route():
# data = {"model": "claude-instant-1", "messages": [{"role": "user", "content": "hey, how's it going?"}]}
# headers = {'Content-Type': 'application/json'}
# response = requests.get(BASE_URL, headers=headers, data=json.dumps(data))
# print(response.text)
# assert response.status_code == 200
# print("Hello route test passed!")
# if __name__ == '__main__':
# test_hello_route()

View file

@ -4,7 +4,10 @@
import sys, os
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
@ -13,11 +16,11 @@ litellm.set_verbose = True
model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
messages = [{"content": user_message, "role": "user"}]
for model in model_fallback_list:
try:
response = embedding(model="text-embedding-ada-002", input=[user_message])
response = completion(model=model, messages=messages)
except Exception as e:
print(f"error occurred: {traceback.format_exc()}")
print(f"error occurred: {traceback.format_exc()}")

View file

@ -53,7 +53,6 @@
# # # return this generator to the client for streaming requests
# # async def get_response():
# # global generator
# # async for elem in generator:

View file

@ -12,7 +12,6 @@
# import asyncio
# user_message = "respond in 20 words. who are you?"
# messages = [{ "content": user_message,"role": "user"}]
@ -45,8 +44,3 @@
# pytest.fail(f"Error occurred: {e}")
# test_completion_ollama_stream()

View file

@ -4,7 +4,10 @@
import sys, os
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
from infisical import InfisicalClient
@ -15,15 +18,8 @@ infisical_token = os.environ["INFISICAL_TOKEN"]
litellm.secret_manager_client = InfisicalClient(token=infisical_token)
user_message = "Hello, whats the weather in San Francisco??"
messages = [{ "content": user_message,"role": "user"}]
messages = [{"content": user_message, "role": "user"}]
def test_completion_azure():
try:
response = completion(model="gpt-3.5-turbo", deployment_id="chatgpt-test", messages=messages, custom_llm_provider="azure")
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_openai():
try:
@ -31,12 +27,9 @@ def test_completion_openai():
# Add any assertions here to check the response
print(response)
except Exception as e:
litellm.secret_manager_client = None
pytest.fail(f"Error occurred: {e}")
litellm.secret_manager_client = None
def test_completion_openai_with_optional_params():
try:
response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai")
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_openai()

View file

@ -3,7 +3,10 @@
import sys, os
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import completion
@ -11,29 +14,40 @@ litellm.set_verbose = False
score = 0
def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}")
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
# test on anthropic completion call
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
# test on anthropic completion call
try:
response = completion(model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn)
response = completion(
model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
)
for chunk in response:
print(chunk['choices'][0]['delta'])
score +=1
print(chunk["choices"][0]["delta"])
score += 1
except:
print(f"error occurred: {traceback.format_exc()}")
print(f"error occurred: {traceback.format_exc()}")
pass
# test on anthropic completion call
# test on anthropic completion call
try:
response = completion(model="meta-llama/Llama-2-7b-chat-hf", messages=messages, custom_llm_provider="huggingface", custom_api_base="https://s7c7gytn18vnu4tw.us-east-1.aws.endpoints.huggingface.cloud", stream=True, logger_fn=logger_fn)
response = completion(
model="meta-llama/Llama-2-7b-chat-hf",
messages=messages,
custom_llm_provider="huggingface",
custom_api_base="https://s7c7gytn18vnu4tw.us-east-1.aws.endpoints.huggingface.cloud",
stream=True,
logger_fn=logger_fn,
)
for chunk in response:
print(chunk['choices'][0]['delta'])
score +=1
print(chunk["choices"][0]["delta"])
score += 1
except:
print(f"error occurred: {traceback.format_exc()}")
pass
print(f"error occurred: {traceback.format_exc()}")
pass

View file

@ -1,5 +1,5 @@
# #### What this tests ####
# # This tests if logging to the helicone integration actually works
# # This tests if logging to the supabase integration actually works
# # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
# import sys, os
# import traceback
@ -9,10 +9,11 @@
# import litellm
# from litellm import embedding, completion
# litellm.input_callback = ["supabase"]
# litellm.success_callback = ["supabase"]
# litellm.failure_callback = ["supabase"]
# litellm.modify_integration("supabase",{"table_name": "litellm_logs"})
# # litellm.modify_integration("supabase",{"table_name": "test_table"})
# litellm.set_verbose = True
@ -21,7 +22,7 @@
# #openai call
# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
# #bad request call
# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])
# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])

View file

@ -3,10 +3,14 @@
import sys, os
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import time
from litellm import timeout
@timeout(10)
def stop_after_10_s(force_timeout=60):
print("Stopping after 10 seconds")
@ -14,14 +18,14 @@ def stop_after_10_s(force_timeout=60):
return
start_time = time.time()
start_time = time.time()
try:
stop_after_10_s(force_timeout=1)
stop_after_10_s(force_timeout=1)
except Exception as e:
print(e)
pass
print(e)
pass
end_time = time.time()
print(f"total time: {end_time-start_time}")
print(f"total time: {end_time-start_time}")

View file

@ -49,4 +49,4 @@
# # chat = chat_model.start_chat()
# # response = chat.send_message("who are u? write a sentence", **parameters)
# # print(f"Response from Model: {response.text}")
# # print(f"Response from Model: {response.text}")

View file

@ -11,9 +11,7 @@ from threading import Thread
from openai.error import Timeout
def timeout(
timeout_duration: float = None, exception_to_raise = Timeout
):
def timeout(timeout_duration: float = 0.0, exception_to_raise=Timeout):
"""
Wraps a function to raise the specified exception if execution time
is greater than the specified timeout.
@ -44,7 +42,9 @@ def timeout(
result = future.result(timeout=local_timeout_duration)
except futures.TimeoutError:
thread.stop_loop()
raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).")
raise exception_to_raise(
f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s)."
)
thread.stop_loop()
return result
@ -59,7 +59,9 @@ def timeout(
)
return value
except asyncio.TimeoutError:
raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).")
raise exception_to_raise(
f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s)."
)
if iscoroutinefunction(func):
return async_wrapper
@ -80,4 +82,4 @@ class _LoopWrapper(Thread):
def stop_loop(self):
for task in asyncio.all_tasks(self.loop):
task.cancel()
self.loop.call_soon_threadsafe(self.loop.stop)
self.loop.call_soon_threadsafe(self.loop.stop)

File diff suppressed because it is too large Load diff

914
poetry.lock generated

File diff suppressed because it is too large Load diff

86
proxy-server/main.py Normal file
View file

@ -0,0 +1,86 @@
from flask import Flask, request, jsonify, abort, Response
from flask_cors import CORS
import traceback
import litellm
from litellm import completion
import openai
from utils import handle_error, get_cache, add_cache
import os, dotenv
import logging
import json
dotenv.load_dotenv()
# TODO: set your keys in .env or here:
# os.environ["OPENAI_API_KEY"] = "" # set your openai key here
# see supported models / keys here: https://litellm.readthedocs.io/en/latest/supported/
######### LOGGING ###################
# log your data to slack, supabase
litellm.success_callback=["slack", "supabase"] # set .env SLACK_API_TOKEN, SLACK_API_SECRET, SLACK_API_CHANNEL, SUPABASE
######### ERROR MONITORING ##########
# log errors to slack, sentry, supabase
litellm.failure_callback=["slack", "sentry", "supabase"] # .env SENTRY_API_URL
app = Flask(__name__)
CORS(app)
@app.route('/')
def index():
return 'received!', 200
def data_generator(response):
for chunk in response:
yield f"data: {json.dumps(chunk)}\n\n"
@app.route('/chat/completions', methods=["POST"])
def api_completion():
data = request.json
if data.get('stream') == "True":
data['stream'] = True # convert to boolean
try:
# pass in data to completion function, unpack data
response = completion(**data)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return Response(data_generator(response), mimetype='text/event-stream')
except Exception as e:
# call handle_error function
print(f"got error{e}")
return handle_error(data)
return response, 200 # non streaming responses
@app.route('/get_models', methods=["POST"])
def get_models():
try:
return litellm.model_list
except Exception as e:
traceback.print_exc()
response = {"error": str(e)}
return response, 200
if __name__ == "__main__":
from waitress import serve
serve(app, host="0.0.0.0", port=5000, threads=500)
############### Advanced ##########################
############ Caching ###################################
# make a new endpoint with caching
# This Cache is built using ChromaDB
# it has two functions add_cache() and get_cache()
@app.route('/chat/completions_with_cache', methods=["POST"])
def api_completion_with_cache():
data = request.json
try:
cache_response = get_cache(data['messages'])
if cache_response!=None:
return cache_response
# pass in data to completion function, unpack data
response = completion(**data)
# add to cache
except Exception as e:
# call handle_error function
return handle_error(data)
return response, 200

168
proxy-server/readme.md Normal file
View file

@ -0,0 +1,168 @@
# liteLLM Proxy Server: 50+ LLM Models, Error Handling, Caching
### Azure, Llama2, OpenAI, Claude, Hugging Face, Replicate Models
[![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
![Downloads](https://img.shields.io/pypi/dm/litellm)
[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)
[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/DYqQAW?referralCode=t3ukrU)
![4BC6491E-86D0-4833-B061-9F54524B2579](https://github.com/BerriAI/litellm/assets/17561003/f5dd237b-db5e-42e1-b1ac-f05683b1d724)
## What does liteLLM proxy do
- Make `/chat/completions` requests for 50+ LLM models **Azure, OpenAI, Replicate, Anthropic, Hugging Face**
Example: for `model` use `claude-2`, `gpt-3.5`, `gpt-4`, `command-nightly`, `stabilityai/stablecode-completion-alpha-3b-4k`
```json
{
"model": "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1",
"messages": [
{
"content": "Hello, whats the weather in San Francisco??",
"role": "user"
}
]
}
```
- **Consistent Input/Output** Format
- Call all models using the OpenAI format - `completion(model, messages)`
- Text responses will always be available at `['choices'][0]['message']['content']`
- **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`)
- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/
**Example: Logs sent to Supabase**
<img width="1015" alt="Screenshot 2023-08-11 at 4 02 46 PM" src="https://github.com/ishaan-jaff/proxy-server/assets/29436595/237557b8-ba09-4917-982c-8f3e1b2c8d08">
- **Token Usage & Spend** - Track Input + Completion tokens used + Spend/model
- **Caching** - Implementation of Semantic Caching
- **Streaming & Async Support** - Return generators to stream text responses
## API Endpoints
### `/chat/completions` (POST)
This endpoint is used to generate chat completions for 50+ support LLM API Models. Use llama2, GPT-4, Claude2 etc
#### Input
This API endpoint accepts all inputs in raw JSON and expects the following inputs
- `model` (string, required): ID of the model to use for chat completions. See all supported models [here]: (https://litellm.readthedocs.io/en/latest/supported/):
eg `gpt-3.5-turbo`, `gpt-4`, `claude-2`, `command-nightly`, `stabilityai/stablecode-completion-alpha-3b-4k`
- `messages` (array, required): A list of messages representing the conversation context. Each message should have a `role` (system, user, assistant, or function), `content` (message text), and `name` (for function role).
- Additional Optional parameters: `temperature`, `functions`, `function_call`, `top_p`, `n`, `stream`. See the full list of supported inputs here: https://litellm.readthedocs.io/en/latest/input/
#### Example JSON body
For claude-2
```json
{
"model": "claude-2",
"messages": [
{
"content": "Hello, whats the weather in San Francisco??",
"role": "user"
}
]
}
```
### Making an API request to the Proxy Server
```python
import requests
import json
# TODO: use your URL
url = "http://localhost:5000/chat/completions"
payload = json.dumps({
"model": "gpt-3.5-turbo",
"messages": [
{
"content": "Hello, whats the weather in San Francisco??",
"role": "user"
}
]
})
headers = {
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
```
### Output [Response Format]
Responses from the server are given in the following format.
All responses from the server are returned in the following format (for all LLM models). More info on output here: https://litellm.readthedocs.io/en/latest/output/
```json
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "I'm sorry, but I don't have the capability to provide real-time weather information. However, you can easily check the weather in San Francisco by searching online or using a weather app on your phone.",
"role": "assistant"
}
}
],
"created": 1691790381,
"id": "chatcmpl-7mUFZlOEgdohHRDx2UpYPRTejirzb",
"model": "gpt-3.5-turbo-0613",
"object": "chat.completion",
"usage": {
"completion_tokens": 41,
"prompt_tokens": 16,
"total_tokens": 57
}
}
```
## Installation & Usage
### Running Locally
1. Clone liteLLM repository to your local machine:
```
git clone https://github.com/BerriAI/liteLLM-proxy
```
2. Install the required dependencies using pip
```
pip install requirements.txt
```
3. Set your LLM API keys
```
os.environ['OPENAI_API_KEY]` = "YOUR_API_KEY"
or
set OPENAI_API_KEY in your .env file
```
4. Run the server:
```
python main.py
```
## Deploying
1. Quick Start: Deploy on Railway
[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/DYqQAW?referralCode=t3ukrU)
2. `GCP`, `AWS`, `Azure`
This project includes a `Dockerfile` allowing you to build and deploy a Docker Project on your providers
# Support / Talk with founders
- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
## Roadmap
- [ ] Support hosted db (e.g. Supabase)
- [ ] Easily send data to places like posthog and sentry.
- [ ] Add a hot-cache for project spend logs - enables fast checks for user + project limitings
- [ ] Implement user-based rate-limiting
- [ ] Spending controls per project - expose key creation endpoint
- [ ] Need to store a keys db -> mapping created keys to their alias (i.e. project name)
- [ ] Easily add new models as backups / as the entry-point (add this to the available model list)

View file

@ -0,0 +1,21 @@
# import openai
# import os
# os.environ["OPENAI_API_KEY"] = ""
# openai.api_key = os.environ["OPENAI_API_KEY"]
# openai.api_base ="http://localhost:5000"
# messages = [
# {
# "role": "user",
# "content": "write a 1 pg essay in liteLLM"
# }
# ]
# response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages, stream=True)
# print("got response", response)
# # response is a generator
# for chunk in response:
# print(chunk)

View file

@ -1,53 +1,15 @@
from flask import Flask, request, jsonify, abort
from flask_cors import CORS
import traceback
import litellm
from litellm import completion
import os, dotenv
import json
dotenv.load_dotenv()
######### LOGGING ###################
# log your data to slack, supabase
litellm.success_callback=["slack", "supabase"] # set .env SLACK_API_TOKEN, SLACK_API_SECRET, SLACK_API_CHANNEL, SUPABASE
######### ERROR MONITORING ##########
# log errors to slack, sentry, supabase
litellm.failure_callback=["slack", "sentry", "supabase"] # .env SENTRY_API_URL
app = Flask(__name__)
CORS(app)
@app.route('/')
def index():
return 'received!', 200
@app.route('/chat/completions', methods=["POST"])
def api_completion():
data = request.json
try:
# pass in data to completion function, unpack data
response = completion(**data)
except Exception as e:
# call handle_error function
return handle_error(data)
return response, 200
@app.route('/get_models', methods=["POST"])
def get_models():
try:
return litellm.model_list
except Exception as e:
traceback.print_exc()
response = {"error": str(e)}
return response, 200
if __name__ == "__main__":
from waitress import serve
serve(app, host="0.0.0.0", port=5000, threads=500)
############### Advanced ##########################
########### streaming ############################
def generate_responses(response):
for chunk in response:
yield json.dumps({"response": chunk}) + "\n"
################ ERROR HANDLING #####################
# implement model fallbacks, cooldowns, and retries
# if a model fails assume it was rate limited and let it cooldown for 60s
@ -82,26 +44,6 @@ def handle_error(data):
############ Caching ###################################
# make a new endpoint with caching
# This Cache is built using ChromaDB
# it has two functions add_cache() and get_cache()
@app.route('/chat/completions', methods=["POST"])
def api_completion_with_cache():
data = request.json
try:
cache_response = get_cache(data['messages'])
if cache_response!=None:
return cache_response
# pass in data to completion function, unpack data
response = completion(**data)
# add to cache
except Exception as e:
# call handle_error function
return handle_error(data)
return response, 200
import uuid
cache_collection = None
# Add a response to the cache

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "0.1.400"
version = "0.1.436"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT License"